From 0b9c54382a54d0dddf6672ab08968c18377b94a6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:09 +0200 Subject: DRBD: lru_cache The lru_cache is a fixed size cache of equal sized objects. It allows its users to do arbitrary transactions in case an element in the cache needs to be replaced. Its replacement policy is LRU. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/lru_cache.c | 397 +++++++++++++++++++++++++++++++++++++++++ drivers/block/drbd/lru_cache.h | 116 ++++++++++++ 2 files changed, 513 insertions(+) create mode 100644 drivers/block/drbd/lru_cache.c create mode 100644 drivers/block/drbd/lru_cache.h diff --git a/drivers/block/drbd/lru_cache.c b/drivers/block/drbd/lru_cache.c new file mode 100644 index 000000000000..33fad4d3e358 --- /dev/null +++ b/drivers/block/drbd/lru_cache.c @@ -0,0 +1,397 @@ +/* + lru_cache.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include /* for memset */ +#include /* for seq_printf */ +#include "lru_cache.h" + +/* this is developers aid only! */ +#define PARANOIA_ENTRY() BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)) +#define PARANOIA_LEAVE() do { clear_bit(__LC_PARANOIA, &lc->flags); smp_mb__after_clear_bit(); } while (0) +#define RETURN(x...) do { PARANOIA_LEAVE(); return x ; } while (0) + +static inline size_t size_of_lc(unsigned int e_count, size_t e_size) +{ + return sizeof(struct lru_cache) + + e_count * (e_size + sizeof(struct hlist_head)); +} + +static inline void lc_init(struct lru_cache *lc, + const size_t bytes, const char *name, + const unsigned int e_count, const size_t e_size, + void *private_p) +{ + struct lc_element *e; + unsigned int i; + + memset(lc, 0, bytes); + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + lc->element_size = e_size; + lc->nr_elements = e_count; + lc->new_number = -1; + lc->lc_private = private_p; + lc->name = name; + for (i = 0; i < e_count; i++) { + e = lc_entry(lc, i); + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + /* memset(,0,) did the rest of init for us */ + } +} + +/** + * lc_alloc: allocates memory for @e_count objects of @e_size bytes plus the + * struct lru_cache, and the hash table slots. + * returns pointer to a newly initialized lru_cache object with said parameters. + */ +struct lru_cache *lc_alloc(const char *name, unsigned int e_count, + size_t e_size, void *private_p) +{ + struct lru_cache *lc; + size_t bytes; + + BUG_ON(!e_count); + e_size = max(sizeof(struct lc_element), e_size); + bytes = size_of_lc(e_count, e_size); + lc = vmalloc(bytes); + if (lc) + lc_init(lc, bytes, name, e_count, e_size, private_p); + return lc; +} + +/** + * lc_free: Frees memory allocated by lc_alloc. + * @lc: The lru_cache object + */ +void lc_free(struct lru_cache *lc) +{ + vfree(lc); +} + +/** + * lc_reset: does a full reset for @lc and the hash table slots. + * It is roughly the equivalent of re-allocating a fresh lru_cache object, + * basically a short cut to lc_free(lc); lc = lc_alloc(...); + */ +void lc_reset(struct lru_cache *lc) +{ + lc_init(lc, size_of_lc(lc->nr_elements, lc->element_size), lc->name, + lc->nr_elements, lc->element_size, lc->lc_private); +} + +size_t lc_printf_stats(struct seq_file *seq, struct lru_cache *lc) +{ + /* NOTE: + * total calls to lc_get are + * (starving + hits + misses) + * misses include "dirty" count (update from an other thread in + * progress) and "changed", when this in fact lead to an successful + * update of the cache. + */ + return seq_printf(seq, "\t%s: used:%u/%u " + "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + lc->name, lc->used, lc->nr_elements, + lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); +} + +static unsigned int lc_hash_fn(struct lru_cache *lc, unsigned int enr) +{ + return enr % lc->nr_elements; +} + + +/** + * lc_find: Returns the pointer to an element, if the element is present + * in the hash table. In case it is not this function returns NULL. + * @lc: The lru_cache object + * @enr: element number + */ +struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) +{ + struct hlist_node *n; + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + hlist_for_each_entry(e, n, lc->slot + lc_hash_fn(lc, enr), colision) { + if (e->lc_number == enr) + return e; + } + return NULL; +} + +static struct lc_element *lc_evict(struct lru_cache *lc) +{ + struct list_head *n; + struct lc_element *e; + + if (list_empty(&lc->lru)) + return NULL; + + n = lc->lru.prev; + e = list_entry(n, struct lc_element, list); + + list_del(&e->list); + hlist_del(&e->colision); + return e; +} + +/** + * lc_del: Removes an element from the cache (and therefore adds the + * element's storage to the free list) + * + * @lc: The lru_cache object + * @e: The element to remove + */ +void lc_del(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + BUG_ON(e->refcnt); + list_del(&e->list); + hlist_del_init(&e->colision); + e->lc_number = LC_FREE; + e->refcnt = 0; + list_add(&e->list, &lc->free); + RETURN(); +} + +static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +{ + struct list_head *n; + + if (list_empty(&lc->free)) + return lc_evict(lc); + + n = lc->free.next; + list_del(n); + return list_entry(n, struct lc_element, list); +} + +static int lc_unused_element_available(struct lru_cache *lc) +{ + if (!list_empty(&lc->free)) + return 1; /* something on the free list */ + if (!list_empty(&lc->lru)) + return 1; /* something to evict */ + + return 0; +} + + +/** + * lc_get: Finds an element in the cache, increases its usage count, + * "touches" and returns it. + * In case the requested number is not present, it needs to be added to the + * cache. Therefore it is possible that an other element becomes eviced from + * the cache. In either case, the user is notified so he is able to e.g. keep + * a persistent log of the cache changes, and therefore the objects in use. + * + * Return values: + * NULL if the requested element number was not in the cache, and no unused + * element could be recycled + * pointer to the element with the REQUESTED element number + * In this case, it can be used right away + * + * pointer to an UNUSED element with some different element number. + * In this case, the cache is marked dirty, and the returned element + * pointer is removed from the lru list and hash collision chains. + * The user now should do whatever houskeeping is necessary. Then he + * needs to call lc_element_changed(lc,element_pointer), to finish the + * change. + * + * NOTE: The user needs to check the lc_number on EACH use, so he recognizes + * any cache set change. + * + * @lc: The lru_cache object + * @enr: element number + */ +struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + RETURN(e); + } + + ++lc->misses; + + /* In case there is nothing available and we can not kick out + * the LRU element, we have to wait ... + */ + if (!lc_unused_element_available(lc)) { + __set_bit(__LC_STARVING, &lc->flags); + RETURN(NULL); + } + + /* it was not present in the cache, find an unused element, + * which then is replaced. + * we need to update the cache; serialize on lc->flags & LC_DIRTY + */ + if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { + ++lc->dirty; + RETURN(NULL); + } + + e = lc_get_unused_element(lc); + BUG_ON(!e); + + clear_bit(__LC_STARVING, &lc->flags); + BUG_ON(++e->refcnt != 1); + lc->used++; + + lc->changing_element = e; + lc->new_number = enr; + + RETURN(e); +} + +/* similar to lc_get, + * but only gets a new reference on an existing element. + * you either get the requested element, or NULL. + */ +struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + } + RETURN(e); +} + +void lc_changed(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + BUG_ON(e != lc->changing_element); + ++lc->changed; + e->lc_number = lc->new_number; + list_add(&e->list, &lc->in_use); + hlist_add_head(&e->colision, + lc->slot + lc_hash_fn(lc, lc->new_number)); + lc->changing_element = NULL; + lc->new_number = -1; + clear_bit(__LC_DIRTY, &lc->flags); + smp_mb__after_clear_bit(); + PARANOIA_LEAVE(); +} + + +unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) +{ + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + BUG_ON(!e); + + PARANOIA_ENTRY(); + BUG_ON(e->refcnt == 0); + BUG_ON(e == lc->changing_element); + if (--e->refcnt == 0) { + /* move it to the front of LRU. */ + list_move(&e->list, &lc->lru); + lc->used--; + clear_bit(__LC_STARVING, &lc->flags); + smp_mb__after_clear_bit(); + } + RETURN(e->refcnt); +} + + +/** + * lc_set: Sets an element in the cache. You might use this function to + * setup the cache. It is expected that the elements are properly initialized. + * @lc: The lru_cache object + * @enr: element number + * @index: The elements' position in the cache + */ +void lc_set(struct lru_cache *lc, unsigned int enr, int index) +{ + struct lc_element *e; + + if (index < 0 || index >= lc->nr_elements) + return; + + e = lc_entry(lc, index); + e->lc_number = enr; + + hlist_del_init(&e->colision); + hlist_add_head(&e->colision, lc->slot + lc_hash_fn(lc, enr)); + list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); +} + +/** + * lc_dump: Dump a complete LRU cache to seq in textual form. + */ +void lc_dump(struct lru_cache *lc, struct seq_file *seq, char *utext, + void (*detail) (struct seq_file *, struct lc_element *)) +{ + unsigned int nr_elements = lc->nr_elements; + struct lc_element *e; + int i; + + seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); + for (i = 0; i < nr_elements; i++) { + e = lc_entry(lc, i); + if (e->lc_number == LC_FREE) { + seq_printf(seq, "\t%2d: FREE\n", i); + } else { + seq_printf(seq, "\t%2d: %4u %4u ", i, + e->lc_number, + e->refcnt); + detail(seq, e); + } + } +} + diff --git a/drivers/block/drbd/lru_cache.h b/drivers/block/drbd/lru_cache.h new file mode 100644 index 000000000000..6c095df24c04 --- /dev/null +++ b/drivers/block/drbd/lru_cache.h @@ -0,0 +1,116 @@ +/* + lru_cache.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#ifndef LRU_CACHE_H +#define LRU_CACHE_H + +#include + +struct lc_element { + struct hlist_node colision; + struct list_head list; /* LRU list or free list */ + unsigned int refcnt; + unsigned int lc_number; +}; + +struct lru_cache { + struct list_head lru; + struct list_head free; + struct list_head in_use; + size_t element_size; + unsigned int nr_elements; + unsigned int new_number; + + unsigned int used; + unsigned long flags; + unsigned long hits, misses, starving, dirty, changed; + struct lc_element *changing_element; /* just for paranoia */ + + void *lc_private; + const char *name; + + struct hlist_head slot[0]; + /* hash colision chains here, then element storage. */ +}; + + +/* flag-bits for lru_cache */ +enum { + __LC_PARANOIA, + __LC_DIRTY, + __LC_STARVING, +}; +#define LC_PARANOIA (1<<__LC_PARANOIA) +#define LC_DIRTY (1<<__LC_DIRTY) +#define LC_STARVING (1<<__LC_STARVING) + +extern struct lru_cache *lc_alloc(const char *name, unsigned int e_count, + size_t e_size, void *private_p); +extern void lc_reset(struct lru_cache *lc); +extern void lc_free(struct lru_cache *lc); +extern void lc_set(struct lru_cache *lc, unsigned int enr, int index); +extern void lc_del(struct lru_cache *lc, struct lc_element *element); + +extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); +extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); +extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); +extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); +extern void lc_changed(struct lru_cache *lc, struct lc_element *e); + +struct seq_file; +extern size_t lc_printf_stats(struct seq_file *seq, struct lru_cache *lc); + +void lc_dump(struct lru_cache *lc, struct seq_file *seq, char *utext, + void (*detail) (struct seq_file *, struct lc_element *)); + +/* This can be used to stop lc_get from changing the set of active elements. + * Note that the reference counts and order on the lru list may still change. + * returns true if we aquired the lock. + */ +static inline int lc_try_lock(struct lru_cache *lc) +{ + return !test_and_set_bit(__LC_DIRTY, &lc->flags); +} + +static inline void lc_unlock(struct lru_cache *lc) +{ + clear_bit(__LC_DIRTY, &lc->flags); + smp_mb__after_clear_bit(); +} + +static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e = lc_find(lc, enr); + return e && e->refcnt; +} + +#define LC_FREE (-1U) + +#define lc_e_base(lc) ((char *)((lc)->slot + (lc)->nr_elements)) +#define lc_entry(lc, i) ((struct lc_element *) \ + (lc_e_base(lc) + (i)*(lc)->element_size)) +#define lc_index_of(lc, e) (((char *)(e) - lc_e_base(lc))/(lc)->element_size) + +#endif -- cgit v1.2.3 From e71028ebf72df033165c3898c4e3d617a0355ec4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:10 +0200 Subject: DRBD: activity_log Within DRBD the activity log is used to track extents (4MB each) in which IO happens (or happened recently). It is based on the LRU cache. Each change of the activity log causes a meta data update (single sector write). The size of the activity log is configured by the user, and is a tradeoff between minimizing updates to the meta data and the resync time after the crash of a primary node. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 1473 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 1473 insertions(+) create mode 100644 drivers/block/drbd/drbd_actlog.c diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c new file mode 100644 index 000000000000..f5c0614cf386 --- /dev/null +++ b/drivers/block/drbd/drbd_actlog.c @@ -0,0 +1,1473 @@ +/* + drbd_actlog.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include "drbd_int.h" +#include "drbd_wrappers.h" + +/* I do not believe that all storage medias can guarantee atomic + * 512 byte write operations. When the journal is read, only + * transactions with correct xor_sums are considered. + * sizeof() = 512 byte */ +struct __attribute__((packed)) al_transaction { + u32 magic; + u32 tr_number; + struct __attribute__((packed)) { + u32 pos; + u32 extent; } updates[1 + AL_EXTENTS_PT]; + u32 xor_sum; +}; + +struct update_odbm_work { + struct drbd_work w; + unsigned int enr; +}; + +struct update_al_work { + struct drbd_work w; + struct lc_element *al_ext; + struct completion event; + unsigned int enr; + /* if old_enr != LC_FREE, write corresponding bitmap sector, too */ + unsigned int old_enr; +}; + +struct drbd_atodb_wait { + atomic_t count; + struct completion io_done; + struct drbd_conf *mdev; + int error; +}; + + +int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); + +STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev, + struct page *page, sector_t sector, + int rw, int size) +{ + struct bio *bio; + struct drbd_md_io md_io; + int ok; + + md_io.mdev = mdev; + init_completion(&md_io.event); + md_io.error = 0; + + if (rw == WRITE && !test_bit(MD_NO_BARRIER, &mdev->flags)) + rw |= (1<bi_bdev = bdev->md_bdev; + bio->bi_sector = sector; + ok = (bio_add_page(bio, page, size, 0) == size); + if (!ok) + goto out; + bio->bi_private = &md_io; + bio->bi_end_io = drbd_md_io_complete; + bio->bi_rw = rw; + + dump_internal_bio("Md", mdev, bio, 0); + + if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) + bio_endio(bio, -EIO); + else + submit_bio(rw, bio); + wait_for_completion(&md_io.event); + ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0; + + /* check for unsupported barrier op. + * would rather check on EOPNOTSUPP, but that is not reliable. + * don't try again for ANY return value != 0 */ + if (unlikely(bio_barrier(bio) && !ok)) { + /* Try again with no barrier */ + drbd_WARN("Barriers not supported on meta data device - disabling\n"); + set_bit(MD_NO_BARRIER, &mdev->flags); + rw &= ~(1 << BIO_RW_BARRIER); + bio_put(bio); + goto retry; + } + out: + bio_put(bio); + return ok; +} + +int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, + sector_t sector, int rw) +{ + int hardsect, mask, ok; + int offset = 0; + struct page *iop = mdev->md_io_page; + + D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); + + BUG_ON(!bdev->md_bdev); + + hardsect = drbd_get_hardsect(bdev->md_bdev); + if (hardsect == 0) + hardsect = MD_HARDSECT; + + /* in case hardsect != 512 [ s390 only? ] */ + if (hardsect != MD_HARDSECT) { + mask = (hardsect / MD_HARDSECT) - 1; + D_ASSERT(mask == 1 || mask == 3 || mask == 7); + D_ASSERT(hardsect == (mask+1) * MD_HARDSECT); + offset = sector & mask; + sector = sector & ~mask; + iop = mdev->md_io_tmpp; + + if (rw == WRITE) { + void *p = page_address(mdev->md_io_page); + void *hp = page_address(mdev->md_io_tmpp); + + ok = _drbd_md_sync_page_io(mdev, bdev, iop, + sector, READ, hardsect); + + if (unlikely(!ok)) { + ERR("drbd_md_sync_page_io(,%llus," + "READ [hardsect!=512]) failed!\n", + (unsigned long long)sector); + return 0; + } + + memcpy(hp + offset*MD_HARDSECT , p, MD_HARDSECT); + } + } + + if (sector < drbd_md_first_sector(bdev) || + sector > drbd_md_last_sector(bdev)) + ALERT("%s [%d]:%s(,%llus,%s) out of range md access!\n", + current->comm, current->pid, __func__, + (unsigned long long)sector, rw ? "WRITE" : "READ"); + + ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, hardsect); + if (unlikely(!ok)) { + ERR("drbd_md_sync_page_io(,%llus,%s) failed!\n", + (unsigned long long)sector, rw ? "WRITE" : "READ"); + return 0; + } + + if (hardsect != MD_HARDSECT && rw == READ) { + void *p = page_address(mdev->md_io_page); + void *hp = page_address(mdev->md_io_tmpp); + + memcpy(p, hp + offset*MD_HARDSECT, MD_HARDSECT); + } + + return ok; +} + +static inline +struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) +{ + struct lc_element *al_ext; + struct bm_extent *bm_ext; + unsigned long al_flags = 0; + + spin_lock_irq(&mdev->al_lock); + bm_ext = (struct bm_extent *) + lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); + if (unlikely(bm_ext != NULL)) { + if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { + spin_unlock_irq(&mdev->al_lock); + return NULL; + } + } + al_ext = lc_get(mdev->act_log, enr); + al_flags = mdev->act_log->flags; + spin_unlock_irq(&mdev->al_lock); + + /* + if (!al_ext) { + if (al_flags & LC_STARVING) + drbd_WARN("Have to wait for LRU element (AL too small?)\n"); + if (al_flags & LC_DIRTY) + drbd_WARN("Ongoing AL update (AL device too slow?)\n"); + } + */ + + return al_ext; +} + +void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) +{ + unsigned int enr = (sector >> (AL_EXTENT_SIZE_B-9)); + struct lc_element *al_ext; + struct update_al_work al_work; + + D_ASSERT(atomic_read(&mdev->local_cnt) > 0); + + MTRACE(TraceTypeALExts, TraceLvlMetrics, + INFO("al_begin_io( sec=%llus (al_enr=%u) (rs_enr=%d) )\n", + (unsigned long long) sector, enr, + (int)BM_SECT_TO_EXT(sector)); + ); + + wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr))); + + if (al_ext->lc_number != enr) { + /* drbd_al_write_transaction(mdev,al_ext,enr); + generic_make_request() are serialized on the + current->bio_tail list now. Therefore we have + to deligate writing something to AL to the + worker thread. */ + init_completion(&al_work.event); + al_work.al_ext = al_ext; + al_work.enr = enr; + al_work.old_enr = al_ext->lc_number; + al_work.w.cb = w_al_write_transaction; + drbd_queue_work_front(&mdev->data.work, &al_work.w); + wait_for_completion(&al_work.event); + + mdev->al_writ_cnt++; + + spin_lock_irq(&mdev->al_lock); + lc_changed(mdev->act_log, al_ext); + spin_unlock_irq(&mdev->al_lock); + wake_up(&mdev->al_wait); + } +} + +void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) +{ + unsigned int enr = (sector >> (AL_EXTENT_SIZE_B-9)); + struct lc_element *extent; + unsigned long flags; + + MTRACE(TraceTypeALExts, TraceLvlMetrics, + INFO("al_complete_io( sec=%llus (al_enr=%u) (rs_enr=%d) )\n", + (unsigned long long) sector, enr, + (int)BM_SECT_TO_EXT(sector)); + ); + + spin_lock_irqsave(&mdev->al_lock, flags); + + extent = lc_find(mdev->act_log, enr); + + if (!extent) { + spin_unlock_irqrestore(&mdev->al_lock, flags); + ERR("al_complete_io() called on inactive extent %u\n", enr); + return; + } + + if (lc_put(mdev->act_log, extent) == 0) + wake_up(&mdev->al_wait); + + spin_unlock_irqrestore(&mdev->al_lock, flags); +} + +int +w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct update_al_work *aw = (struct update_al_work *)w; + struct lc_element *updated = aw->al_ext; + const unsigned int new_enr = aw->enr; + const unsigned int evicted = aw->old_enr; + + struct al_transaction *buffer; + sector_t sector; + int i, n, mx; + unsigned int extent_nr; + u32 xor_sum = 0; + + if (!inc_local(mdev)) { + ERR("inc_local() failed in w_al_write_transaction\n"); + complete(&((struct update_al_work *)w)->event); + return 1; + } + /* do we have to do a bitmap write, first? + * TODO reduce maximum latency: + * submit both bios, then wait for both, + * instead of doing two synchronous sector writes. */ + if (mdev->state.conn < Connected && evicted != LC_FREE) + drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); + + mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */ + buffer = (struct al_transaction *)page_address(mdev->md_io_page); + + buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); + buffer->tr_number = cpu_to_be32(mdev->al_tr_number); + + n = lc_index_of(mdev->act_log, updated); + + buffer->updates[0].pos = cpu_to_be32(n); + buffer->updates[0].extent = cpu_to_be32(new_enr); + + xor_sum ^= new_enr; + + mx = min_t(int, AL_EXTENTS_PT, + mdev->act_log->nr_elements - mdev->al_tr_cycle); + for (i = 0; i < mx; i++) { + extent_nr = lc_entry(mdev->act_log, + mdev->al_tr_cycle+i)->lc_number; + buffer->updates[i+1].pos = cpu_to_be32(mdev->al_tr_cycle+i); + buffer->updates[i+1].extent = cpu_to_be32(extent_nr); + xor_sum ^= extent_nr; + } + for (; i < AL_EXTENTS_PT; i++) { + buffer->updates[i+1].pos = __constant_cpu_to_be32(-1); + buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE); + xor_sum ^= LC_FREE; + } + mdev->al_tr_cycle += AL_EXTENTS_PT; + if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) + mdev->al_tr_cycle = 0; + + buffer->xor_sum = cpu_to_be32(xor_sum); + + sector = mdev->bc->md.md_offset + + mdev->bc->md.al_offset + mdev->al_tr_pos; + + if (!drbd_md_sync_page_io(mdev, mdev->bc, sector, WRITE)) { + drbd_chk_io_error(mdev, 1, TRUE); + drbd_io_error(mdev, TRUE); + } + + if (++mdev->al_tr_pos > + div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) + mdev->al_tr_pos = 0; + + D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); + mdev->al_tr_number++; + + mutex_unlock(&mdev->md_io_mutex); + + complete(&((struct update_al_work *)w)->event); + dec_local(mdev); + + return 1; +} + +/** + * drbd_al_read_tr: Reads a single transaction record form the + * on disk activity log. + * Returns -1 on IO error, 0 on checksum error and 1 if it is a valid + * record. + */ +STATIC int drbd_al_read_tr(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev, + struct al_transaction *b, + int index) +{ + sector_t sector; + int rv, i; + u32 xor_sum = 0; + + sector = bdev->md.md_offset + bdev->md.al_offset + index; + + /* Dont process error normally, + * as this is done before disk is atached! */ + if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) + return -1; + + rv = (be32_to_cpu(b->magic) == DRBD_MAGIC); + + for (i = 0; i < AL_EXTENTS_PT + 1; i++) + xor_sum ^= be32_to_cpu(b->updates[i].extent); + rv &= (xor_sum == be32_to_cpu(b->xor_sum)); + + return rv; +} + +/** + * drbd_al_read_log: Restores the activity log from its on disk + * representation. Returns 1 on success, returns 0 when + * reading the log failed due to IO errors. + */ +int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) +{ + struct al_transaction *buffer; + int i; + int rv; + int mx; + int cnr; + int active_extents = 0; + int transactions = 0; + int overflow = 0; + int from = -1; + int to = -1; + u32 from_tnr = -1; + u32 to_tnr = 0; + + mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT); + + /* lock out all other meta data io for now, + * and make sure the page is mapped. + */ + mutex_lock(&mdev->md_io_mutex); + buffer = page_address(mdev->md_io_page); + + /* Find the valid transaction in the log */ + for (i = 0; i <= mx; i++) { + rv = drbd_al_read_tr(mdev, bdev, buffer, i); + if (rv == 0) + continue; + if (rv == -1) { + mutex_unlock(&mdev->md_io_mutex); + return 0; + } + cnr = be32_to_cpu(buffer->tr_number); + + if (cnr == -1) + overflow = 1; + + if (cnr < from_tnr && !overflow) { + from = i; + from_tnr = cnr; + } + if (cnr > to_tnr) { + to = i; + to_tnr = cnr; + } + } + + if (from == -1 || to == -1) { + drbd_WARN("No usable activity log found.\n"); + + mutex_unlock(&mdev->md_io_mutex); + return 1; + } + + /* Read the valid transactions. + * INFO("Reading from %d to %d.\n",from,to); */ + i = from; + while (1) { + int j, pos; + unsigned int extent_nr; + unsigned int trn; + + rv = drbd_al_read_tr(mdev, bdev, buffer, i); + ERR_IF(rv == 0) goto cancel; + if (rv == -1) { + mutex_unlock(&mdev->md_io_mutex); + return 0; + } + + trn = be32_to_cpu(buffer->tr_number); + + spin_lock_irq(&mdev->al_lock); + + /* This loop runs backwards because in the cyclic + elements there might be an old version of the + updated element (in slot 0). So the element in slot 0 + can overwrite old versions. */ + for (j = AL_EXTENTS_PT; j >= 0; j--) { + pos = be32_to_cpu(buffer->updates[j].pos); + extent_nr = be32_to_cpu(buffer->updates[j].extent); + + if (extent_nr == LC_FREE) + continue; + + lc_set(mdev->act_log, extent_nr, pos); + active_extents++; + } + spin_unlock_irq(&mdev->al_lock); + + transactions++; + +cancel: + if (i == to) + break; + i++; + if (i > mx) + i = 0; + } + + mdev->al_tr_number = to_tnr+1; + mdev->al_tr_pos = to; + if (++mdev->al_tr_pos > + div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) + mdev->al_tr_pos = 0; + + /* ok, we are done with it */ + mutex_unlock(&mdev->md_io_mutex); + + INFO("Found %d transactions (%d active extents) in activity log.\n", + transactions, active_extents); + + return 1; +} + +STATIC void atodb_endio(struct bio *bio, int error) +{ + struct drbd_atodb_wait *wc = bio->bi_private; + struct drbd_conf *mdev = wc->mdev; + struct page *page; + int uptodate = bio_flagged(bio, BIO_UPTODATE); + + /* strange behaviour of some lower level drivers... + * fail the request by clearing the uptodate flag, + * but do not return any error?! */ + if (!error && !uptodate) + error = -EIO; + + /* corresponding drbd_io_error is in drbd_al_to_on_disk_bm */ + drbd_chk_io_error(mdev, error, TRUE); + if (error && wc->error == 0) + wc->error = error; + + if (atomic_dec_and_test(&wc->count)) + complete(&wc->io_done); + + page = bio->bi_io_vec[0].bv_page; + put_page(page); + bio_put(bio); + mdev->bm_writ_cnt++; + dec_local(mdev); +} + +#define S2W(s) ((s)<<(BM_EXT_SIZE_B-BM_BLOCK_SIZE_B-LN2_BPL)) +/* activity log to on disk bitmap -- prepare bio unless that sector + * is already covered by previously prepared bios */ +STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev, + struct bio **bios, + unsigned int enr, + struct drbd_atodb_wait *wc) __must_hold(local) +{ + struct bio *bio; + struct page *page; + sector_t on_disk_sector = enr + mdev->bc->md.md_offset + + mdev->bc->md.bm_offset; + unsigned int page_offset = PAGE_SIZE; + int offset; + int i = 0; + int err = -ENOMEM; + + /* Check if that enr is already covered by an already created bio. + * Caution, bios[] is not NULL terminated, + * but only initialized to all NULL. + * For completely scattered activity log, + * the last invocation iterates over all bios, + * and finds the last NULL entry. + */ + while ((bio = bios[i])) { + if (bio->bi_sector == on_disk_sector) + return 0; + i++; + } + /* bios[i] == NULL, the next not yet used slot */ + + bio = bio_alloc(GFP_KERNEL, 1); + if (bio == NULL) + return -ENOMEM; + + if (i > 0) { + const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec; + page_offset = prev_bv->bv_offset + prev_bv->bv_len; + page = prev_bv->bv_page; + } + if (page_offset == PAGE_SIZE) { + page = alloc_page(__GFP_HIGHMEM); + if (page == NULL) + goto out_bio_put; + page_offset = 0; + } else { + get_page(page); + } + + offset = S2W(enr); + drbd_bm_get_lel(mdev, offset, + min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset), + kmap(page) + page_offset); + kunmap(page); + + bio->bi_private = wc; + bio->bi_end_io = atodb_endio; + bio->bi_bdev = mdev->bc->md_bdev; + bio->bi_sector = on_disk_sector; + + if (bio_add_page(bio, page, MD_HARDSECT, page_offset) != MD_HARDSECT) + goto out_put_page; + + atomic_inc(&wc->count); + /* we already know that we may do this... + * inc_local_if_state(mdev,Attaching); + * just get the extra reference, so that the local_cnt reflects + * the number of pending IO requests DRBD at its backing device. + */ + atomic_inc(&mdev->local_cnt); + + bios[i] = bio; + + return 0; + +out_put_page: + err = -EINVAL; + put_page(page); +out_bio_put: + bio_put(bio); + return err; +} + +/** + * drbd_al_to_on_disk_bm: + * Writes the areas of the bitmap which are covered by the AL. + * called when we detach (unconfigure) local storage, + * or when we go from Primary to Secondary state. + */ +void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) +{ + int i, nr_elements; + unsigned int enr; + struct bio **bios; + struct drbd_atodb_wait wc; + + ERR_IF (!inc_local_if_state(mdev, Attaching)) + return; /* sorry, I don't have any act_log etc... */ + + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + + nr_elements = mdev->act_log->nr_elements; + + bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL); + if (!bios) + goto submit_one_by_one; + + atomic_set(&wc.count, 0); + init_completion(&wc.io_done); + wc.mdev = mdev; + wc.error = 0; + + for (i = 0; i < nr_elements; i++) { + enr = lc_entry(mdev->act_log, i)->lc_number; + if (enr == LC_FREE) + continue; + /* next statement also does atomic_inc wc.count and local_cnt */ + if (atodb_prepare_unless_covered(mdev, bios, + enr/AL_EXT_PER_BM_SECT, + &wc)) + goto free_bios_submit_one_by_one; + } + + /* unneccessary optimization? */ + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + + /* all prepared, submit them */ + for (i = 0; i < nr_elements; i++) { + if (bios[i] == NULL) + break; + if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) { + bios[i]->bi_rw = WRITE; + bio_endio(bios[i], -EIO); + } else { + submit_bio(WRITE, bios[i]); + } + } + + drbd_blk_run_queue(bdev_get_queue(mdev->bc->md_bdev)); + + /* always (try to) flush bitmap to stable storage */ + drbd_md_flush(mdev); + + /* In case we did not submit a single IO do not wait for + * them to complete. ( Because we would wait forever here. ) + * + * In case we had IOs and they are already complete, there + * is not point in waiting anyways. + * Therefore this if () ... */ + if (atomic_read(&wc.count)) + wait_for_completion(&wc.io_done); + + dec_local(mdev); + + if (wc.error) + drbd_io_error(mdev, TRUE); + kfree(bios); + return; + + free_bios_submit_one_by_one: + /* free everything by calling the endio callback directly. */ + for (i = 0; i < nr_elements && bios[i]; i++) + bio_endio(bios[i], 0); + + kfree(bios); + + submit_one_by_one: + drbd_WARN("Using the slow drbd_al_to_on_disk_bm()\n"); + + for (i = 0; i < mdev->act_log->nr_elements; i++) { + enr = lc_entry(mdev->act_log, i)->lc_number; + if (enr == LC_FREE) + continue; + /* Really slow: if we have al-extents 16..19 active, + * sector 4 will be written four times! Synchronous! */ + drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT); + } + + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + dec_local(mdev); +} + +/** + * drbd_al_apply_to_bm: Sets the bits in the bitmap that are described + * by the active extents of the AL. + */ +void drbd_al_apply_to_bm(struct drbd_conf *mdev) +{ + unsigned int enr; + unsigned long add = 0; + char ppb[10]; + int i; + + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + + for (i = 0; i < mdev->act_log->nr_elements; i++) { + enr = lc_entry(mdev->act_log, i)->lc_number; + if (enr == LC_FREE) + continue; + add += drbd_bm_ALe_set_all(mdev, enr); + } + + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + + INFO("Marked additional %s as out-of-sync based on AL.\n", + ppsize(ppb, Bit2KB(add))); +} + +static inline int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) +{ + int rv; + + spin_lock_irq(&mdev->al_lock); + rv = (al_ext->refcnt == 0); + if (likely(rv)) + lc_del(mdev->act_log, al_ext); + spin_unlock_irq(&mdev->al_lock); + + MTRACE(TraceTypeALExts, TraceLvlMetrics, + if (unlikely(!rv)) + INFO("Waiting for extent in drbd_al_shrink()\n"); + ); + + return rv; +} + +/** + * drbd_al_shrink: Removes all active extents form the AL. (but does not + * write any transactions) + * You need to lock mdev->act_log with lc_try_lock() / lc_unlock() + */ +void drbd_al_shrink(struct drbd_conf *mdev) +{ + struct lc_element *al_ext; + int i; + + D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); + + for (i = 0; i < mdev->act_log->nr_elements; i++) { + al_ext = lc_entry(mdev->act_log, i); + if (al_ext->lc_number == LC_FREE) + continue; + wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext)); + } + + wake_up(&mdev->al_wait); +} + +STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct update_odbm_work *udw = (struct update_odbm_work *)w; + + if (!inc_local(mdev)) { + if (__ratelimit(&drbd_ratelimit_state)) + drbd_WARN("Can not update on disk bitmap, local IO disabled.\n"); + return 1; + } + + drbd_bm_write_sect(mdev, udw->enr); + dec_local(mdev); + + kfree(udw); + + if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) { + switch (mdev->state.conn) { + case SyncSource: case SyncTarget: + case PausedSyncS: case PausedSyncT: + drbd_resync_finished(mdev); + default: + /* nothing to do */ + break; + } + } + drbd_bcast_sync_progress(mdev); + + return 1; +} + + +/* ATTENTION. The AL's extents are 4MB each, while the extents in the + * resync LRU-cache are 16MB each. + * The caller of this function has to hold an inc_local() reference. + * + * TODO will be obsoleted once we have a caching lru of the on disk bitmap + */ +STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, + int count, int success) +{ + struct bm_extent *ext; + struct update_odbm_work *udw; + + unsigned int enr; + + D_ASSERT(atomic_read(&mdev->local_cnt)); + + /* I simply assume that a sector/size pair never crosses + * a 16 MB extent border. (Currently this is true...) */ + enr = BM_SECT_TO_EXT(sector); + + ext = (struct bm_extent *) lc_get(mdev->resync, enr); + if (ext) { + if (ext->lce.lc_number == enr) { + if (success) + ext->rs_left -= count; + else + ext->rs_failed += count; + if (ext->rs_left < ext->rs_failed) { + ERR("BAD! sector=%llus enr=%u rs_left=%d " + "rs_failed=%d count=%d\n", + (unsigned long long)sector, + ext->lce.lc_number, ext->rs_left, + ext->rs_failed, count); + dump_stack(); + + lc_put(mdev->resync, &ext->lce); + drbd_force_state(mdev, NS(conn, Disconnecting)); + return; + } + } else { + /* Normally this element should be in the cache, + * since drbd_rs_begin_io() pulled it already in. + * + * But maybe an application write finished, and we set + * something outside the resync lru_cache in sync. + */ + int rs_left = drbd_bm_e_weight(mdev, enr); + if (ext->flags != 0) { + drbd_WARN("changing resync lce: %d[%u;%02lx]" + " -> %d[%u;00]\n", + ext->lce.lc_number, ext->rs_left, + ext->flags, enr, rs_left); + ext->flags = 0; + } + if (ext->rs_failed) { + drbd_WARN("Kicking resync_lru element enr=%u " + "out with rs_failed=%d\n", + ext->lce.lc_number, ext->rs_failed); + set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); + } + ext->rs_left = rs_left; + ext->rs_failed = success ? 0 : count; + lc_changed(mdev->resync, &ext->lce); + } + lc_put(mdev->resync, &ext->lce); + /* no race, we are within the al_lock! */ + + if (ext->rs_left == ext->rs_failed) { + ext->rs_failed = 0; + + udw = kmalloc(sizeof(*udw), GFP_ATOMIC); + if (udw) { + udw->enr = ext->lce.lc_number; + udw->w.cb = w_update_odbm; + drbd_queue_work_front(&mdev->data.work, &udw->w); + } else { + drbd_WARN("Could not kmalloc an udw\n"); + set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); + } + } + } else { + ERR("lc_get() failed! locked=%d/%d flags=%lu\n", + mdev->resync_locked, + mdev->resync->nr_elements, + mdev->resync->flags); + } +} + +/* clear the bit corresponding to the piece of storage in question: + * size byte of data starting from sector. Only clear a bits of the affected + * one ore more _aligned_ BM_BLOCK_SIZE blocks. + * + * called by worker on SyncTarget and receiver on SyncSource. + * + */ +void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, + const char *file, const unsigned int line) +{ + /* Is called from worker and receiver context _only_ */ + unsigned long sbnr, ebnr, lbnr; + unsigned long count = 0; + sector_t esector, nr_sectors; + int wake_up = 0; + unsigned long flags; + + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + ERR("drbd_set_in_sync: sector=%llus size=%d nonsense!\n", + (unsigned long long)sector, size); + return; + } + nr_sectors = drbd_get_capacity(mdev->this_bdev); + esector = sector + (size >> 9) - 1; + + ERR_IF(sector >= nr_sectors) return; + ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); + + lbnr = BM_SECT_TO_BIT(nr_sectors-1); + + /* we clear it (in sync). + * round up start sector, round down end sector. we make sure we only + * clear full, alligned, BM_BLOCK_SIZE (4K) blocks */ + if (unlikely(esector < BM_SECT_PER_BIT-1)) + return; + if (unlikely(esector == (nr_sectors-1))) + ebnr = lbnr; + else + ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); + sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); + + MTRACE(TraceTypeResync, TraceLvlMetrics, + INFO("drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", + (unsigned long long)sector, size, sbnr, ebnr); + ); + + if (sbnr > ebnr) + return; + + /* + * ok, (capacity & 7) != 0 sometimes, but who cares... + * we count rs_{total,left} in bits, not sectors. + */ + spin_lock_irqsave(&mdev->al_lock, flags); + count = drbd_bm_clear_bits(mdev, sbnr, ebnr); + if (count) { + /* we need the lock for drbd_try_clear_on_disk_bm */ + if (jiffies - mdev->rs_mark_time > HZ*10) { + /* should be roling marks, + * but we estimate only anyways. */ + if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) && + mdev->state.conn != PausedSyncT && + mdev->state.conn != PausedSyncS) { + mdev->rs_mark_time = jiffies; + mdev->rs_mark_left = drbd_bm_total_weight(mdev); + } + } + if (inc_local(mdev)) { + drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE); + dec_local(mdev); + } + /* just wake_up unconditional now, various lc_chaged(), + * lc_put() in drbd_try_clear_on_disk_bm(). */ + wake_up = 1; + } + spin_unlock_irqrestore(&mdev->al_lock, flags); + if (wake_up) + wake_up(&mdev->al_wait); +} + +/* + * this is intended to set one request worth of data out of sync. + * affects at least 1 bit, + * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits. + * + * called by tl_clear and drbd_send_dblock (==drbd_make_request). + * so this can be _any_ process. + */ +void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, + const char *file, const unsigned int line) +{ + unsigned long sbnr, ebnr, lbnr, flags; + sector_t esector, nr_sectors; + unsigned int enr, count; + struct bm_extent *ext; + + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + ERR("sector: %llus, size: %d\n", + (unsigned long long)sector, size); + return; + } + + if (!inc_local(mdev)) + return; /* no disk, no metadata, no bitmap to set bits in */ + + nr_sectors = drbd_get_capacity(mdev->this_bdev); + esector = sector + (size >> 9) - 1; + + ERR_IF(sector >= nr_sectors) + goto out; + ERR_IF(esector >= nr_sectors) + esector = (nr_sectors-1); + + lbnr = BM_SECT_TO_BIT(nr_sectors-1); + + /* we set it out of sync, + * we do not need to round anything here */ + sbnr = BM_SECT_TO_BIT(sector); + ebnr = BM_SECT_TO_BIT(esector); + + MTRACE(TraceTypeResync, TraceLvlMetrics, + INFO("drbd_set_out_of_sync: sector=%llus size=%u " + "sbnr=%lu ebnr=%lu\n", + (unsigned long long)sector, size, sbnr, ebnr); + ); + + /* ok, (capacity & 7) != 0 sometimes, but who cares... + * we count rs_{total,left} in bits, not sectors. */ + spin_lock_irqsave(&mdev->al_lock, flags); + count = drbd_bm_set_bits(mdev, sbnr, ebnr); + + enr = BM_SECT_TO_EXT(sector); + ext = (struct bm_extent *) lc_find(mdev->resync, enr); + if (ext) + ext->rs_left += count; + spin_unlock_irqrestore(&mdev->al_lock, flags); + +out: + dec_local(mdev); +} + +static inline +struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) +{ + struct bm_extent *bm_ext; + int wakeup = 0; + unsigned long rs_flags; + + spin_lock_irq(&mdev->al_lock); + if (mdev->resync_locked > mdev->resync->nr_elements/2) { + spin_unlock_irq(&mdev->al_lock); + return NULL; + } + bm_ext = (struct bm_extent *) lc_get(mdev->resync, enr); + if (bm_ext) { + if (bm_ext->lce.lc_number != enr) { + bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); + bm_ext->rs_failed = 0; + lc_changed(mdev->resync, (struct lc_element *)bm_ext); + wakeup = 1; + } + if (bm_ext->lce.refcnt == 1) + mdev->resync_locked++; + set_bit(BME_NO_WRITES, &bm_ext->flags); + } + rs_flags = mdev->resync->flags; + spin_unlock_irq(&mdev->al_lock); + if (wakeup) + wake_up(&mdev->al_wait); + + if (!bm_ext) { + if (rs_flags & LC_STARVING) + drbd_WARN("Have to wait for element" + " (resync LRU too small?)\n"); + BUG_ON(rs_flags & LC_DIRTY); + } + + return bm_ext; +} + +static inline int _is_in_al(struct drbd_conf *mdev, unsigned int enr) +{ + struct lc_element *al_ext; + int rv = 0; + + spin_lock_irq(&mdev->al_lock); + if (unlikely(enr == mdev->act_log->new_number)) + rv = 1; + else { + al_ext = lc_find(mdev->act_log, enr); + if (al_ext) { + if (al_ext->refcnt) + rv = 1; + } + } + spin_unlock_irq(&mdev->al_lock); + + /* + if (unlikely(rv)) { + INFO("Delaying sync read until app's write is done\n"); + } + */ + return rv; +} + +/** + * drbd_rs_begin_io: Gets an extent in the resync LRU cache and sets it + * to BME_LOCKED. + * + * @sector: The sector number + * + * sleeps on al_wait. + * returns 1 if successful. + * returns 0 if interrupted. + */ +int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) +{ + unsigned int enr = BM_SECT_TO_EXT(sector); + struct bm_extent *bm_ext; + int i, sig; + + MTRACE(TraceTypeResync, TraceLvlAll, + INFO("drbd_rs_begin_io: sector=%llus (rs_end=%d)\n", + (unsigned long long)sector, enr); + ); + + sig = wait_event_interruptible(mdev->al_wait, + (bm_ext = _bme_get(mdev, enr))); + if (sig) + return 0; + + if (test_bit(BME_LOCKED, &bm_ext->flags)) + return 1; + + for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { + sig = wait_event_interruptible(mdev->al_wait, + !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i)); + if (sig) { + spin_lock_irq(&mdev->al_lock); + if (lc_put(mdev->resync, &bm_ext->lce) == 0) { + clear_bit(BME_NO_WRITES, &bm_ext->flags); + mdev->resync_locked--; + wake_up(&mdev->al_wait); + } + spin_unlock_irq(&mdev->al_lock); + return 0; + } + } + + set_bit(BME_LOCKED, &bm_ext->flags); + + return 1; +} + +/** + * drbd_try_rs_begin_io: Gets an extent in the resync LRU cache, sets it + * to BME_NO_WRITES, then tries to set it to BME_LOCKED. + * + * @sector: The sector number + * + * does not sleep. + * returns zero if we could set BME_LOCKED and can proceed, + * -EAGAIN if we need to try again. + */ +int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) +{ + unsigned int enr = BM_SECT_TO_EXT(sector); + const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; + struct bm_extent *bm_ext; + int i; + + MTRACE(TraceTypeResync, TraceLvlAll, + INFO("drbd_try_rs_begin_io: sector=%llus\n", + (unsigned long long)sector); + ); + + spin_lock_irq(&mdev->al_lock); + if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) { + /* in case you have very heavy scattered io, it may + * stall the syncer undefined if we giveup the ref count + * when we try again and requeue. + * + * if we don't give up the refcount, but the next time + * we are scheduled this extent has been "synced" by new + * application writes, we'd miss the lc_put on the + * extent we keept the refcount on. + * so we remembered which extent we had to try agin, and + * if the next requested one is something else, we do + * the lc_put here... + * we also have to wake_up + */ + MTRACE(TraceTypeResync, TraceLvlAll, + INFO("dropping %u, aparently got 'synced' " + "by application io\n", mdev->resync_wenr); + ); + bm_ext = (struct bm_extent *) + lc_find(mdev->resync, mdev->resync_wenr); + if (bm_ext) { + D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); + D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags)); + clear_bit(BME_NO_WRITES, &bm_ext->flags); + mdev->resync_wenr = LC_FREE; + if (lc_put(mdev->resync, &bm_ext->lce) == 0) + mdev->resync_locked--; + wake_up(&mdev->al_wait); + } else { + ALERT("LOGIC BUG\n"); + } + } + bm_ext = (struct bm_extent *)lc_try_get(mdev->resync, enr); + if (bm_ext) { + if (test_bit(BME_LOCKED, &bm_ext->flags)) + goto proceed; + if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) { + mdev->resync_locked++; + } else { + /* we did set the BME_NO_WRITES, + * but then could not set BME_LOCKED, + * so we tried again. + * drop the extra reference. */ + MTRACE(TraceTypeResync, TraceLvlAll, + INFO("dropping extra reference on %u\n", enr); + ); + bm_ext->lce.refcnt--; + D_ASSERT(bm_ext->lce.refcnt > 0); + } + goto check_al; + } else { + if (mdev->resync_locked > mdev->resync->nr_elements-3) { + MTRACE(TraceTypeResync, TraceLvlAll, + INFO("resync_locked = %u!\n", mdev->resync_locked); + ); + goto try_again; + } + bm_ext = (struct bm_extent *)lc_get(mdev->resync, enr); + if (!bm_ext) { + const unsigned long rs_flags = mdev->resync->flags; + if (rs_flags & LC_STARVING) + drbd_WARN("Have to wait for element" + " (resync LRU too small?)\n"); + BUG_ON(rs_flags & LC_DIRTY); + goto try_again; + } + if (bm_ext->lce.lc_number != enr) { + bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); + bm_ext->rs_failed = 0; + lc_changed(mdev->resync, (struct lc_element *)bm_ext); + wake_up(&mdev->al_wait); + D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); + } + set_bit(BME_NO_WRITES, &bm_ext->flags); + D_ASSERT(bm_ext->lce.refcnt == 1); + mdev->resync_locked++; + goto check_al; + } +check_al: + MTRACE(TraceTypeResync, TraceLvlAll, + INFO("checking al for %u\n", enr); + ); + for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { + if (unlikely(al_enr+i == mdev->act_log->new_number)) + goto try_again; + if (lc_is_used(mdev->act_log, al_enr+i)) + goto try_again; + } + set_bit(BME_LOCKED, &bm_ext->flags); +proceed: + mdev->resync_wenr = LC_FREE; + spin_unlock_irq(&mdev->al_lock); + return 0; + +try_again: + MTRACE(TraceTypeResync, TraceLvlAll, + INFO("need to try again for %u\n", enr); + ); + if (bm_ext) + mdev->resync_wenr = enr; + spin_unlock_irq(&mdev->al_lock); + return -EAGAIN; +} + +void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) +{ + unsigned int enr = BM_SECT_TO_EXT(sector); + struct bm_extent *bm_ext; + unsigned long flags; + + MTRACE(TraceTypeResync, TraceLvlAll, + INFO("drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n", + (long long)sector, enr); + ); + + spin_lock_irqsave(&mdev->al_lock, flags); + bm_ext = (struct bm_extent *) lc_find(mdev->resync, enr); + if (!bm_ext) { + spin_unlock_irqrestore(&mdev->al_lock, flags); + ERR("drbd_rs_complete_io() called, but extent not found\n"); + return; + } + + if (bm_ext->lce.refcnt == 0) { + spin_unlock_irqrestore(&mdev->al_lock, flags); + ERR("drbd_rs_complete_io(,%llu [=%u]) called, " + "but refcnt is 0!?\n", + (unsigned long long)sector, enr); + return; + } + + if (lc_put(mdev->resync, (struct lc_element *)bm_ext) == 0) { + clear_bit(BME_LOCKED, &bm_ext->flags); + clear_bit(BME_NO_WRITES, &bm_ext->flags); + mdev->resync_locked--; + wake_up(&mdev->al_wait); + } + + spin_unlock_irqrestore(&mdev->al_lock, flags); +} + +/** + * drbd_rs_cancel_all: Removes extents from the resync LRU. Even + * if they are BME_LOCKED. + */ +void drbd_rs_cancel_all(struct drbd_conf *mdev) +{ + MTRACE(TraceTypeResync, TraceLvlMetrics, + INFO("drbd_rs_cancel_all\n"); + ); + + spin_lock_irq(&mdev->al_lock); + + if (inc_local_if_state(mdev, Failed)) { /* Makes sure ->resync is there. */ + lc_reset(mdev->resync); + dec_local(mdev); + } + mdev->resync_locked = 0; + mdev->resync_wenr = LC_FREE; + spin_unlock_irq(&mdev->al_lock); + wake_up(&mdev->al_wait); +} + +/** + * drbd_rs_del_all: Gracefully remove all extents from the resync LRU. + * there may be still a reference hold by someone. In that case this function + * returns -EAGAIN. + * In case all elements got removed it returns zero. + */ +int drbd_rs_del_all(struct drbd_conf *mdev) +{ + struct bm_extent *bm_ext; + int i; + + MTRACE(TraceTypeResync, TraceLvlMetrics, + INFO("drbd_rs_del_all\n"); + ); + + spin_lock_irq(&mdev->al_lock); + + if (inc_local_if_state(mdev, Failed)) { + /* ok, ->resync is there. */ + for (i = 0; i < mdev->resync->nr_elements; i++) { + bm_ext = (struct bm_extent *) lc_entry(mdev->resync, i); + if (bm_ext->lce.lc_number == LC_FREE) + continue; + if (bm_ext->lce.lc_number == mdev->resync_wenr) { + INFO("dropping %u in drbd_rs_del_all, apparently" + " got 'synced' by application io\n", + mdev->resync_wenr); + D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); + D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags)); + clear_bit(BME_NO_WRITES, &bm_ext->flags); + mdev->resync_wenr = LC_FREE; + lc_put(mdev->resync, &bm_ext->lce); + } + if (bm_ext->lce.refcnt != 0) { + INFO("Retrying drbd_rs_del_all() later. " + "refcnt=%d\n", bm_ext->lce.refcnt); + dec_local(mdev); + spin_unlock_irq(&mdev->al_lock); + return -EAGAIN; + } + D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); + D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags)); + lc_del(mdev->resync, &bm_ext->lce); + } + D_ASSERT(mdev->resync->used == 0); + dec_local(mdev); + } + spin_unlock_irq(&mdev->al_lock); + + return 0; +} + +/* Record information on a failure to resync the specified blocks + * + * called on SyncTarget when resync write fails or NegRSDReply received + * + */ +void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) +{ + /* Is called from worker and receiver context _only_ */ + unsigned long sbnr, ebnr, lbnr; + unsigned long count; + sector_t esector, nr_sectors; + int wake_up = 0; + + MTRACE(TraceTypeResync, TraceLvlSummary, + INFO("drbd_rs_failed_io: sector=%llus, size=%u\n", + (unsigned long long)sector, size); + ); + + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + ERR("drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", + (unsigned long long)sector, size); + return; + } + nr_sectors = drbd_get_capacity(mdev->this_bdev); + esector = sector + (size >> 9) - 1; + + ERR_IF(sector >= nr_sectors) return; + ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); + + lbnr = BM_SECT_TO_BIT(nr_sectors-1); + + /* + * round up start sector, round down end sector. we make sure we only + * handle full, alligned, BM_BLOCK_SIZE (4K) blocks */ + if (unlikely(esector < BM_SECT_PER_BIT-1)) + return; + if (unlikely(esector == (nr_sectors-1))) + ebnr = lbnr; + else + ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); + sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); + + if (sbnr > ebnr) + return; + + /* + * ok, (capacity & 7) != 0 sometimes, but who cares... + * we count rs_{total,left} in bits, not sectors. + */ + spin_lock_irq(&mdev->al_lock); + count = drbd_bm_count_bits(mdev, sbnr, ebnr); + if (count) { + mdev->rs_failed += count; + + if (inc_local(mdev)) { + drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE); + dec_local(mdev); + } + + /* just wake_up unconditional now, various lc_chaged(), + * lc_put() in drbd_try_clear_on_disk_bm(). */ + wake_up = 1; + } + spin_unlock_irq(&mdev->al_lock); + if (wake_up) + wake_up(&mdev->al_wait); +} -- cgit v1.2.3 From 1b4efd77d44745e5fded4836ebc917f0fa3ceae2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:11 +0200 Subject: DRBD: bitmap DRBD maintains a dirty bitmap in case it has to run without peer node or without local disk. Writes to the on disk dirty bitmap are minimized by the activity log (=AL). Each time an extent is evicted from the AL the part of the bitmap no longer covered by the AL is written to disk. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 1307 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 1307 insertions(+) create mode 100644 drivers/block/drbd/drbd_bitmap.c diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c new file mode 100644 index 000000000000..6704d79837df --- /dev/null +++ b/drivers/block/drbd/drbd_bitmap.c @@ -0,0 +1,1307 @@ +/* + drbd_bitmap.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2004-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2004-2008, Philipp Reisner . + Copyright (C) 2004-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include "drbd_int.h" + +/* OPAQUE outside this file! + * interface defined in drbd_int.h + + * convetion: + * function name drbd_bm_... => used elsewhere, "public". + * function name bm_... => internal to implementation, "private". + + * Note that since find_first_bit returns int, at the current granularity of + * the bitmap (4KB per byte), this implementation "only" supports up to + * 1<<(32+12) == 16 TB... + */ + +/* + * NOTE + * Access to the *bm_pages is protected by bm_lock. + * It is safe to read the other members within the lock. + * + * drbd_bm_set_bits is called from bio_endio callbacks, + * We may be called with irq already disabled, + * so we need spin_lock_irqsave(). + * And we need the kmap_atomic. + */ +struct drbd_bitmap { + struct page **bm_pages; + spinlock_t bm_lock; + /* WARNING unsigned long bm_*: + * 32bit number of bit offset is just enough for 512 MB bitmap. + * it will blow up if we make the bitmap bigger... + * not that it makes much sense to have a bitmap that large, + * rather change the granularity to 16k or 64k or something. + * (that implies other problems, however...) + */ + unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ + unsigned long bm_bits; + size_t bm_words; + size_t bm_number_of_pages; + sector_t bm_dev_capacity; + struct semaphore bm_change; /* serializes resize operations */ + + atomic_t bm_async_io; + wait_queue_head_t bm_io_wait; + + unsigned long bm_flags; + + /* debugging aid, in case we are still racy somewhere */ + char *bm_why; + struct task_struct *bm_task; +}; + +/* definition of bits in bm_flags */ +#define BM_LOCKED 0 +#define BM_MD_IO_ERROR (BITS_PER_LONG-1) /* 31? 63? */ + +static inline int bm_is_locked(struct drbd_bitmap *b) +{ + return test_bit(BM_LOCKED, &b->bm_flags); +} + +#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) +static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) +{ + struct drbd_bitmap *b = mdev->bitmap; + if (!__ratelimit(&drbd_ratelimit_state)) + return; + ERR("FIXME %s in %s, bitmap locked for '%s' by %s\n", + current == mdev->receiver.task ? "receiver" : + current == mdev->asender.task ? "asender" : + current == mdev->worker.task ? "worker" : current->comm, + func, b->bm_why ?: "?", + b->bm_task == mdev->receiver.task ? "receiver" : + b->bm_task == mdev->asender.task ? "asender" : + b->bm_task == mdev->worker.task ? "worker" : "?"); +} + +void drbd_bm_lock(struct drbd_conf *mdev, char *why) +{ + struct drbd_bitmap *b = mdev->bitmap; + int trylock_failed; + + if (!b) { + ERR("FIXME no bitmap in drbd_bm_lock!?\n"); + return; + } + + trylock_failed = down_trylock(&b->bm_change); + + if (trylock_failed) { + DBG("%s going to '%s' but bitmap already locked for '%s' by %s\n", + current == mdev->receiver.task ? "receiver" : + current == mdev->asender.task ? "asender" : + current == mdev->worker.task ? "worker" : "?", + why, b->bm_why ?: "?", + b->bm_task == mdev->receiver.task ? "receiver" : + b->bm_task == mdev->asender.task ? "asender" : + b->bm_task == mdev->worker.task ? "worker" : "?"); + down(&b->bm_change); + } + if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) + ERR("FIXME bitmap already locked in bm_lock\n"); + + b->bm_why = why; + b->bm_task = current; +} + +void drbd_bm_unlock(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + if (!b) { + ERR("FIXME no bitmap in drbd_bm_unlock!?\n"); + return; + } + + if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags)) + ERR("FIXME bitmap not locked in bm_unlock\n"); + + b->bm_why = NULL; + b->bm_task = NULL; + up(&b->bm_change); +} + +#define bm_end_info(ignored...) ((void)(0)) + +#if 0 +#define catch_oob_access_start() do { \ + do { \ + if ((bm-p_addr) >= PAGE_SIZE/sizeof(long)) { \ + printk(KERN_ALERT "drbd_bitmap.c:%u %s: p_addr:%p bm:%p %d\n", \ + __LINE__ , __func__ , p_addr, bm, (bm-p_addr)); \ + break; \ + } +#define catch_oob_access_end() \ + } while (0); } while (0) +#else +#define catch_oob_access_start() do { +#define catch_oob_access_end() } while (0) +#endif + +/* word offset to long pointer */ +STATIC unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) +{ + struct page *page; + unsigned long page_nr; + + /* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */ + page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); + BUG_ON(page_nr >= b->bm_number_of_pages); + page = b->bm_pages[page_nr]; + + return (unsigned long *) kmap_atomic(page, km); +} + +unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset) +{ + return __bm_map_paddr(b, offset, KM_IRQ1); +} + +void __bm_unmap(unsigned long *p_addr, const enum km_type km) +{ + kunmap_atomic(p_addr, km); +}; + +void bm_unmap(unsigned long *p_addr) +{ + return __bm_unmap(p_addr, KM_IRQ1); +} + +/* long word offset of _bitmap_ sector */ +#define S2W(s) ((s)<<(BM_EXT_SIZE_B-BM_BLOCK_SIZE_B-LN2_BPL)) +/* word offset from start of bitmap to word number _in_page_ + * modulo longs per page +#define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) + hm, well, Philipp thinks gcc might not optimze the % into & (... - 1) + so do it explicitly: + */ +#define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1)) + +/* Long words per page */ +#define LWPP (PAGE_SIZE/sizeof(long)) + +/* + * actually most functions herein should take a struct drbd_bitmap*, not a + * struct drbd_conf*, but for the debug macros I like to have the mdev around + * to be able to report device specific. + */ + +STATIC void bm_free_pages(struct page **pages, unsigned long number) +{ + unsigned long i; + if (!pages) + return; + + for (i = 0; i < number; i++) { + if (!pages[i]) { + printk(KERN_ALERT "drbd: bm_free_pages tried to free " + "a NULL pointer; i=%lu n=%lu\n", + i, number); + continue; + } + __free_page(pages[i]); + pages[i] = NULL; + } +} + +/* + * "have" and "want" are NUMBER OF PAGES. + */ +STATIC struct page **bm_realloc_pages(struct page **old_pages, + unsigned long have, + unsigned long want) +{ + struct page **new_pages, *page; + unsigned int i, bytes; + + BUG_ON(have == 0 && old_pages != NULL); + BUG_ON(have != 0 && old_pages == NULL); + + if (have == want) + return old_pages; + + /* To use kmalloc here is ok, as long as we support 4TB at max... + * otherwise this might become bigger than 128KB, which is + * the maximum for kmalloc. + * + * no, it is not: on 64bit boxes, sizeof(void*) == 8, + * 128MB bitmap @ 4K pages -> 256K of page pointers. + * ==> use vmalloc for now again. + * then again, we could do something like + * if (nr_pages > watermark) vmalloc else kmalloc :*> ... + * or do cascading page arrays: + * one page for the page array of the page array, + * those pages for the real bitmap pages. + * there we could even add some optimization members, + * so we won't need to kmap_atomic in bm_find_next_bit just to see + * that the page has no bits set ... + * or we can try a "huge" page ;-) + */ + bytes = sizeof(struct page *)*want; + new_pages = vmalloc(bytes); + if (!new_pages) + return NULL; + + memset(new_pages, 0, bytes); + if (want >= have) { + for (i = 0; i < have; i++) + new_pages[i] = old_pages[i]; + for (; i < want; i++) { + page = alloc_page(GFP_HIGHUSER); + if (!page) { + bm_free_pages(new_pages + have, i - have); + vfree(new_pages); + return NULL; + } + new_pages[i] = page; + } + } else { + for (i = 0; i < want; i++) + new_pages[i] = old_pages[i]; + /* NOT HERE, we are outside the spinlock! + bm_free_pages(old_pages + want, have - want); + */ + } + + return new_pages; +} + +/* + * called on driver init only. TODO call when a device is created. + * allocates the drbd_bitmap, and stores it in mdev->bitmap. + */ +int drbd_bm_init(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + WARN_ON(b != NULL); + b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL); + if (!b) + return -ENOMEM; + spin_lock_init(&b->bm_lock); + init_MUTEX(&b->bm_change); + init_waitqueue_head(&b->bm_io_wait); + + mdev->bitmap = b; + + return 0; +} + +sector_t drbd_bm_capacity(struct drbd_conf *mdev) +{ + ERR_IF(!mdev->bitmap) return 0; + return mdev->bitmap->bm_dev_capacity; +} + +/* called on driver unload. TODO: call when a device is destroyed. + */ +void drbd_bm_cleanup(struct drbd_conf *mdev) +{ + ERR_IF (!mdev->bitmap) return; + bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); + vfree(mdev->bitmap->bm_pages); + kfree(mdev->bitmap); + mdev->bitmap = NULL; +} + +/* + * since (b->bm_bits % BITS_PER_LONG) != 0, + * this masks out the remaining bits. + * Rerturns the number of bits cleared. + */ +STATIC int bm_clear_surplus(struct drbd_bitmap *b) +{ + const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; + size_t w = b->bm_bits >> LN2_BPL; + int cleared = 0; + unsigned long *p_addr, *bm; + + p_addr = bm_map_paddr(b, w); + bm = p_addr + MLPP(w); + if (w < b->bm_words) { + catch_oob_access_start(); + cleared = hweight_long(*bm & ~mask); + *bm &= mask; + catch_oob_access_end(); + w++; bm++; + } + + if (w < b->bm_words) { + catch_oob_access_start(); + cleared += hweight_long(*bm); + *bm = 0; + catch_oob_access_end(); + } + bm_unmap(p_addr); + return cleared; +} + +STATIC void bm_set_surplus(struct drbd_bitmap *b) +{ + const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; + size_t w = b->bm_bits >> LN2_BPL; + unsigned long *p_addr, *bm; + + p_addr = bm_map_paddr(b, w); + bm = p_addr + MLPP(w); + if (w < b->bm_words) { + catch_oob_access_start(); + *bm |= ~mask; + bm++; w++; + catch_oob_access_end(); + } + + if (w < b->bm_words) { + catch_oob_access_start(); + *bm = ~(0UL); + catch_oob_access_end(); + } + bm_unmap(p_addr); +} + +STATIC unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian) +{ + unsigned long *p_addr, *bm, offset = 0; + unsigned long bits = 0; + unsigned long i, do_now; + + while (offset < b->bm_words) { + i = do_now = min_t(size_t, b->bm_words-offset, LWPP); + p_addr = bm_map_paddr(b, offset); + bm = p_addr + MLPP(offset); + while (i--) { + catch_oob_access_start(); +#ifndef __LITTLE_ENDIAN + if (swap_endian) + *bm = lel_to_cpu(*bm); +#endif + bits += hweight_long(*bm++); + catch_oob_access_end(); + } + bm_unmap(p_addr); + offset += do_now; + } + + return bits; +} + +static inline unsigned long bm_count_bits(struct drbd_bitmap *b) +{ + return __bm_count_bits(b, 0); +} + +static inline unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b) +{ + return __bm_count_bits(b, 1); +} + +void _drbd_bm_recount_bits(struct drbd_conf *mdev, char *file, int line) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long flags, bits; + + ERR_IF(!b) return; + + /* IMO this should be inside drbd_bm_lock/unlock. + * Unfortunately it is used outside of the locks. + * And I'm not yet sure where we need to place the + * lock/unlock correctly. + */ + + spin_lock_irqsave(&b->bm_lock, flags); + bits = bm_count_bits(b); + if (bits != b->bm_set) { + ERR("bm_set was %lu, corrected to %lu. %s:%d\n", + b->bm_set, bits, file, line); + b->bm_set = bits; + } + spin_unlock_irqrestore(&b->bm_lock, flags); +} + +/* offset and len in long words.*/ +STATIC void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) +{ + unsigned long *p_addr, *bm; + size_t do_now, end; + +#define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512) + + end = offset + len; + + if (end > b->bm_words) { + printk(KERN_ALERT "drbd: bm_memset end > bm_words\n"); + return; + } + + while (offset < end) { + do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; + p_addr = bm_map_paddr(b, offset); + bm = p_addr + MLPP(offset); + catch_oob_access_start(); + if (bm+do_now > p_addr + LWPP) { + printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", + p_addr, bm, (int)do_now); + break; /* breaks to after catch_oob_access_end() only! */ + } + memset(bm, c, do_now * sizeof(long)); + catch_oob_access_end(); + bm_unmap(p_addr); + offset += do_now; + } +} + +/* + * make sure the bitmap has enough room for the attached storage, + * if neccessary, resize. + * called whenever we may have changed the device size. + * returns -ENOMEM if we could not allocate enough memory, 0 on success. + * In case this is actually a resize, we copy the old bitmap into the new one. + * Otherwise, the bitmap is initiallized to all bits set. + */ +int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long bits, words, owords, obits, *p_addr, *bm; + unsigned long want, have, onpages; /* number of pages */ + struct page **npages, **opages = NULL; + int err = 0, growing; + + ERR_IF(!b) return -ENOMEM; + + drbd_bm_lock(mdev, "resize"); + + INFO("drbd_bm_resize called with capacity == %llu\n", + (unsigned long long)capacity); + + if (capacity == b->bm_dev_capacity) + goto out; + + if (capacity == 0) { + spin_lock_irq(&b->bm_lock); + opages = b->bm_pages; + onpages = b->bm_number_of_pages; + owords = b->bm_words; + b->bm_pages = NULL; + b->bm_number_of_pages = + b->bm_set = + b->bm_bits = + b->bm_words = + b->bm_dev_capacity = 0; + spin_unlock_irq(&b->bm_lock); + bm_free_pages(opages, onpages); + vfree(opages); + goto out; + } + bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT)); + + /* if we would use + words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL; + a 32bit host could present the wrong number of words + to a 64bit host. + */ + words = ALIGN(bits, 64) >> LN2_BPL; + + if (inc_local(mdev)) { + D_ASSERT((u64)bits <= (((u64)mdev->bc->md.md_size_sect-MD_BM_OFFSET) << 12)); + dec_local(mdev); + } + + /* one extra long to catch off by one errors */ + want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT; + have = b->bm_number_of_pages; + if (want == have) { + D_ASSERT(b->bm_pages != NULL); + npages = b->bm_pages; + } else + npages = bm_realloc_pages(b->bm_pages, have, want); + + if (!npages) { + err = -ENOMEM; + goto out; + } + + spin_lock_irq(&b->bm_lock); + opages = b->bm_pages; + owords = b->bm_words; + obits = b->bm_bits; + + growing = bits > obits; + if (opages) + bm_set_surplus(b); + + b->bm_pages = npages; + b->bm_number_of_pages = want; + b->bm_bits = bits; + b->bm_words = words; + b->bm_dev_capacity = capacity; + + if (growing) { + bm_memset(b, owords, 0xff, words-owords); + b->bm_set += bits - obits; + } + + if (want < have) { + /* implicit: (opages != NULL) && (opages != npages) */ + bm_free_pages(opages + want, have - want); + } + + p_addr = bm_map_paddr(b, words); + bm = p_addr + MLPP(words); + catch_oob_access_start(); + *bm = DRBD_MAGIC; + catch_oob_access_end(); + bm_unmap(p_addr); + + (void)bm_clear_surplus(b); + if (!growing) + b->bm_set = bm_count_bits(b); + + bm_end_info(mdev, __func__); + spin_unlock_irq(&b->bm_lock); + if (opages != npages) + vfree(opages); + INFO("resync bitmap: bits=%lu words=%lu\n", bits, words); + + out: + drbd_bm_unlock(mdev); + return err; +} + +/* inherently racy: + * if not protected by other means, return value may be out of date when + * leaving this function... + * we still need to lock it, since it is important that this returns + * bm_set == 0 precisely. + * + * maybe bm_set should be atomic_t ? + */ +unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long s; + unsigned long flags; + + /* if I don't have a disk, I don't know about out-of-sync status */ + if (!inc_local_if_state(mdev, Negotiating)) + return 0; + + ERR_IF(!b) return 0; + ERR_IF(!b->bm_pages) return 0; + + spin_lock_irqsave(&b->bm_lock, flags); + s = b->bm_set; + spin_unlock_irqrestore(&b->bm_lock, flags); + + dec_local(mdev); + + return s; +} + +size_t drbd_bm_words(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + ERR_IF(!b) return 0; + ERR_IF(!b->bm_pages) return 0; + + return b->bm_words; +} + +unsigned long drbd_bm_bits(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + ERR_IF(!b) return 0; + + return b->bm_bits; +} + +/* merge number words from buffer into the bitmap starting at offset. + * buffer[i] is expected to be little endian unsigned long. + * bitmap must be locked by drbd_bm_lock. + * currently only used from receive_bitmap. + */ +void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, + unsigned long *buffer) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr, *bm; + unsigned long word, bits; + size_t end, do_now; + + end = offset + number; + + ERR_IF(!b) return; + ERR_IF(!b->bm_pages) return; + if (number == 0) + return; + WARN_ON(offset >= b->bm_words); + WARN_ON(end > b->bm_words); + + spin_lock_irq(&b->bm_lock); + while (offset < end) { + do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; + p_addr = bm_map_paddr(b, offset); + bm = p_addr + MLPP(offset); + offset += do_now; + while (do_now--) { + catch_oob_access_start(); + bits = hweight_long(*bm); + word = *bm | lel_to_cpu(*buffer++); + *bm++ = word; + b->bm_set += hweight_long(word) - bits; + catch_oob_access_end(); + } + bm_unmap(p_addr); + } + /* with 32bit <-> 64bit cross-platform connect + * this is only correct for current usage, + * where we _know_ that we are 64 bit aligned, + * and know that this function is used in this way, too... + */ + if (end == b->bm_words) { + b->bm_set -= bm_clear_surplus(b); + bm_end_info(mdev, __func__); + } + spin_unlock_irq(&b->bm_lock); +} + +/* copy number words from the bitmap starting at offset into the buffer. + * buffer[i] will be little endian unsigned long. + */ +void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, + unsigned long *buffer) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr, *bm; + size_t end, do_now; + + end = offset + number; + + ERR_IF(!b) return; + ERR_IF(!b->bm_pages) return; + + spin_lock_irq(&b->bm_lock); + if ((offset >= b->bm_words) || + (end > b->bm_words) || + (number <= 0)) + ERR("offset=%lu number=%lu bm_words=%lu\n", + (unsigned long) offset, + (unsigned long) number, + (unsigned long) b->bm_words); + else { + while (offset < end) { + do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset; + p_addr = bm_map_paddr(b, offset); + bm = p_addr + MLPP(offset); + offset += do_now; + while (do_now--) { + catch_oob_access_start(); + *buffer++ = cpu_to_lel(*bm++); + catch_oob_access_end(); + } + bm_unmap(p_addr); + } + } + spin_unlock_irq(&b->bm_lock); +} + +/* set all bits in the bitmap */ +void drbd_bm_set_all(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + ERR_IF(!b) return; + ERR_IF(!b->bm_pages) return; + + spin_lock_irq(&b->bm_lock); + bm_memset(b, 0, 0xff, b->bm_words); + (void)bm_clear_surplus(b); + b->bm_set = b->bm_bits; + spin_unlock_irq(&b->bm_lock); +} + +/* clear all bits in the bitmap */ +void drbd_bm_clear_all(struct drbd_conf *mdev) +{ + struct drbd_bitmap *b = mdev->bitmap; + ERR_IF(!b) return; + ERR_IF(!b->bm_pages) return; + + spin_lock_irq(&b->bm_lock); + bm_memset(b, 0, 0, b->bm_words); + b->bm_set = 0; + spin_unlock_irq(&b->bm_lock); +} + +static void bm_async_io_complete(struct bio *bio, int error) +{ + struct drbd_bitmap *b = bio->bi_private; + int uptodate = bio_flagged(bio, BIO_UPTODATE); + + + /* strange behaviour of some lower level drivers... + * fail the request by clearing the uptodate flag, + * but do not return any error?! + * do we want to WARN() on this? */ + if (!error && !uptodate) + error = -EIO; + + if (error) { + /* doh. what now? + * for now, set all bits, and flag MD_IO_ERROR */ + __set_bit(BM_MD_IO_ERROR, &b->bm_flags); + } + if (atomic_dec_and_test(&b->bm_async_io)) + wake_up(&b->bm_io_wait); + + bio_put(bio); +} + +STATIC void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local) +{ + /* we are process context. we always get a bio */ + struct bio *bio = bio_alloc(GFP_KERNEL, 1); + unsigned int len; + sector_t on_disk_sector = + mdev->bc->md.md_offset + mdev->bc->md.bm_offset; + on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); + + /* this might happen with very small + * flexible external meta data device */ + len = min_t(unsigned int, PAGE_SIZE, + (drbd_md_last_sector(mdev->bc) - on_disk_sector + 1)<<9); + + bio->bi_bdev = mdev->bc->md_bdev; + bio->bi_sector = on_disk_sector; + bio_add_page(bio, b->bm_pages[page_nr], len, 0); + bio->bi_private = b; + bio->bi_end_io = bm_async_io_complete; + + if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { + bio->bi_rw |= rw; + bio_endio(bio, -EIO); + } else { + submit_bio(rw, bio); + } +} + +# if defined(__LITTLE_ENDIAN) + /* nothing to do, on disk == in memory */ +# define bm_cpu_to_lel(x) ((void)0) +# else +void bm_cpu_to_lel(struct drbd_bitmap *b) +{ + /* need to cpu_to_lel all the pages ... + * this may be optimized by using + * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0; + * the following is still not optimal, but better than nothing */ + if (b->bm_set == 0) { + /* no page at all; avoid swap if all is 0 */ + i = b->bm_number_of_pages; + } else if (b->bm_set == b->bm_bits) { + /* only the last page */ + i = b->bm_number_of_pages - 1; + } else { + /* all pages */ + i = 0; + } + for (; i < b->bm_number_of_pages; i++) { + unsigned long *bm; + /* if you'd want to use kmap_atomic, you'd have to disable irq! */ + p_addr = kmap(b->bm_pages[i]); + for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++) + *bm = cpu_to_lel(*bm); + kunmap(p_addr); + } +} +# endif +/* lel_to_cpu == cpu_to_lel */ +# define bm_lel_to_cpu(x) bm_cpu_to_lel(x) + +/* + * bm_rw: read/write the whole bitmap from/to its on disk location. + */ +STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) +{ + struct drbd_bitmap *b = mdev->bitmap; + /* sector_t sector; */ + int bm_words, num_pages, i; + unsigned long now; + char ppb[10]; + int err = 0; + + WARN_ON(!bm_is_locked(b)); + + /* no spinlock here, the drbd_bm_lock should be enough! */ + + bm_words = drbd_bm_words(mdev); + num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT; + + /* on disk bitmap is little endian */ + if (rw == WRITE) + bm_cpu_to_lel(b); + + now = jiffies; + atomic_set(&b->bm_async_io, num_pages); + __clear_bit(BM_MD_IO_ERROR, &b->bm_flags); + + /* let the layers below us try to merge these bios... */ + for (i = 0; i < num_pages; i++) + bm_page_io_async(mdev, b, i, rw); + + drbd_blk_run_queue(bdev_get_queue(mdev->bc->md_bdev)); + wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); + + MTRACE(TraceTypeMDIO, TraceLvlSummary, + INFO("%s of bitmap took %lu jiffies\n", + rw == READ ? "reading" : "writing", jiffies - now); + ); + + if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { + ALERT("we had at least one MD IO ERROR during bitmap IO\n"); + drbd_chk_io_error(mdev, 1, TRUE); + drbd_io_error(mdev, TRUE); + err = -EIO; + } + + now = jiffies; + if (rw == WRITE) { + /* swap back endianness */ + bm_lel_to_cpu(b); + /* flush bitmap to stable storage */ + drbd_md_flush(mdev); + } else /* rw == READ */ { + /* just read, if neccessary adjust endianness */ + b->bm_set = bm_count_bits_swap_endian(b); + INFO("recounting of set bits took additional %lu jiffies\n", + jiffies - now); + } + now = b->bm_set; + + INFO("%s (%lu bits) marked out-of-sync by on disk bit-map.\n", + ppsize(ppb, now << (BM_BLOCK_SIZE_B-10)), now); + + return err; +} + +/** + * drbd_bm_read: Read the whole bitmap from its on disk location. + * + * currently only called from "drbd_nl_disk_conf" + */ +int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, READ); +} + +/** + * drbd_bm_write: Write the whole bitmap to its on disk location. + * + * called at various occasions. + */ +int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE); +} + +/** + * drbd_bm_write_sect: Writes a 512 byte piece of the bitmap to its + * on disk location. On disk bitmap is little endian. + * + * @enr: The _sector_ offset from the start of the bitmap. + * + */ +int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local) +{ + sector_t on_disk_sector = enr + mdev->bc->md.md_offset + + mdev->bc->md.bm_offset; + int bm_words, num_words, offset; + int err = 0; + + mutex_lock(&mdev->md_io_mutex); + bm_words = drbd_bm_words(mdev); + offset = S2W(enr); /* word offset into bitmap */ + num_words = min(S2W(1), bm_words - offset); + if (num_words < S2W(1)) + memset(page_address(mdev->md_io_page), 0, MD_HARDSECT); + drbd_bm_get_lel(mdev, offset, num_words, + page_address(mdev->md_io_page)); + if (!drbd_md_sync_page_io(mdev, mdev->bc, on_disk_sector, WRITE)) { + int i; + err = -EIO; + ERR("IO ERROR writing bitmap sector %lu " + "(meta-disk sector %llus)\n", + enr, (unsigned long long)on_disk_sector); + drbd_chk_io_error(mdev, 1, TRUE); + drbd_io_error(mdev, TRUE); + for (i = 0; i < AL_EXT_PER_BM_SECT; i++) + drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i); + } + mdev->bm_writ_cnt++; + mutex_unlock(&mdev->md_io_mutex); + return err; +} + +/* NOTE + * find_first_bit returns int, we return unsigned long. + * should not make much difference anyways, but ... + * + * this returns a bit number, NOT a sector! + */ +#define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1) +static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, + const int find_zero_bit, const enum km_type km) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long i = -1UL; + unsigned long *p_addr; + unsigned long bit_offset; /* bit offset of the mapped page. */ + + if (bm_fo > b->bm_bits) { + ERR("bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); + } else { + while (bm_fo < b->bm_bits) { + unsigned long offset; + bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */ + offset = bit_offset >> LN2_BPL; /* word offset of the page */ + p_addr = __bm_map_paddr(b, offset, km); + + if (find_zero_bit) + i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); + else + i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); + + __bm_unmap(p_addr, km); + if (i < PAGE_SIZE*8) { + i = bit_offset + i; + if (i >= b->bm_bits) + break; + goto found; + } + bm_fo = bit_offset + PAGE_SIZE*8; + } + i = -1UL; + } + found: + return i; +} + +static unsigned long bm_find_next(struct drbd_conf *mdev, + unsigned long bm_fo, const int find_zero_bit) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long i = -1UL; + + ERR_IF(!b) return i; + ERR_IF(!b->bm_pages) return i; + + spin_lock_irq(&b->bm_lock); + if (bm_is_locked(b)) + bm_print_lock_info(mdev); + + i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); + + spin_unlock_irq(&b->bm_lock); + return i; +} + +unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) +{ + return bm_find_next(mdev, bm_fo, 0); +} + +#if 0 +/* not yet needed for anything. */ +unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) +{ + return bm_find_next(mdev, bm_fo, 1); +} +#endif + +/* does not spin_lock_irqsave. + * you must take drbd_bm_lock() first */ +unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) +{ + /* WARN_ON(!bm_is_locked(mdev)); */ + return __bm_find_next(mdev, bm_fo, 0, KM_USER1); +} + +unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) +{ + /* WARN_ON(!bm_is_locked(mdev)); */ + return __bm_find_next(mdev, bm_fo, 1, KM_USER1); +} + +/* returns number of bits actually changed. + * for val != 0, we change 0 -> 1, return code positiv + * for val == 0, we change 1 -> 0, return code negative + * wants bitnr, not sector. + * Must hold bitmap lock already. */ + +int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, + const unsigned long e, int val, const enum km_type km) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr = NULL; + unsigned long bitnr; + unsigned long last_page_nr = -1UL; + int c = 0; + + for (bitnr = s; bitnr <= e; bitnr++) { + ERR_IF (bitnr >= b->bm_bits) { + ERR("bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); + } else { + unsigned long offset = bitnr>>LN2_BPL; + unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); + if (page_nr != last_page_nr) { + if (p_addr) + __bm_unmap(p_addr, km); + p_addr = __bm_map_paddr(b, offset, km); + last_page_nr = page_nr; + } + if (val) + c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr)); + else + c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr)); + } + } + if (p_addr) + __bm_unmap(p_addr, km); + b->bm_set += c; + return c; +} + +/* returns number of bits actually changed. + * for val != 0, we change 0 -> 1, return code positiv + * for val == 0, we change 1 -> 0, return code negative + * wants bitnr, not sector */ +int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, + const unsigned long e, int val) +{ + unsigned long flags; + struct drbd_bitmap *b = mdev->bitmap; + int c = 0; + + ERR_IF(!b) return 1; + ERR_IF(!b->bm_pages) return 0; + + spin_lock_irqsave(&b->bm_lock, flags); + if (bm_is_locked(b)) + bm_print_lock_info(mdev); + + c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); + + spin_unlock_irqrestore(&b->bm_lock, flags); + return c; +} + +/* returns number of bits changed 0 -> 1 */ +int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +{ + return bm_change_bits_to(mdev, s, e, 1); +} + +/* returns number of bits changed 1 -> 0 */ +int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +{ + return -bm_change_bits_to(mdev, s, e, 0); +} + +/* the same thing, but without taking the spin_lock_irqsave. + * you must first drbd_bm_lock(). */ +int _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +{ + /* WARN_ON(!bm_is_locked(b)); */ + return __bm_change_bits_to(mdev, s, e, 1, KM_USER0); +} + +/* returns bit state + * wants bitnr, NOT sector. + * inherently racy... area needs to be locked by means of {al,rs}_lru + * 1 ... bit set + * 0 ... bit not set + * -1 ... first out of bounds access, stop testing for bits! + */ +int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) +{ + unsigned long flags; + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr; + int i; + + ERR_IF(!b) return 0; + ERR_IF(!b->bm_pages) return 0; + + spin_lock_irqsave(&b->bm_lock, flags); + if (bm_is_locked(b)) + bm_print_lock_info(mdev); + if (bitnr < b->bm_bits) { + unsigned long offset = bitnr>>LN2_BPL; + p_addr = bm_map_paddr(b, offset); + i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; + bm_unmap(p_addr); + } else if (bitnr == b->bm_bits) { + i = -1; + } else { /* (bitnr > b->bm_bits) */ + ERR("bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits); + i = 0; + } + + spin_unlock_irqrestore(&b->bm_lock, flags); + return i; +} + +/* returns number of bits set */ +int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +{ + unsigned long flags; + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr = NULL, page_nr = -1; + unsigned long bitnr; + int c = 0; + size_t w; + + /* If this is called without a bitmap, that is a bug. But just to be + * robust in case we screwed up elsewhere, in that case pretend there + * was one dirty bit in the requested area, so we won't try to do a + * local read there (no bitmap probably implies no disk) */ + ERR_IF(!b) return 1; + ERR_IF(!b->bm_pages) return 1; + + spin_lock_irqsave(&b->bm_lock, flags); + for (bitnr = s; bitnr <= e; bitnr++) { + w = bitnr >> LN2_BPL; + if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) { + page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3); + if (p_addr) + bm_unmap(p_addr); + p_addr = bm_map_paddr(b, w); + } + ERR_IF (bitnr >= b->bm_bits) { + ERR("bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); + } else { + c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); + } + } + if (p_addr) + bm_unmap(p_addr); + spin_unlock_irqrestore(&b->bm_lock, flags); + return c; +} + + +/* inherently racy... + * return value may be already out-of-date when this function returns. + * but the general usage is that this is only use during a cstate when bits are + * only cleared, not set, and typically only care for the case when the return + * value is zero, or we already "locked" this "bitmap extent" by other means. + * + * enr is bm-extent number, since we chose to name one sector (512 bytes) + * worth of the bitmap a "bitmap extent". + * + * TODO + * I think since we use it like a reference count, we should use the real + * reference count of some bitmap extent element from some lru instead... + * + */ +int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) +{ + struct drbd_bitmap *b = mdev->bitmap; + int count, s, e; + unsigned long flags; + unsigned long *p_addr, *bm; + + ERR_IF(!b) return 0; + ERR_IF(!b->bm_pages) return 0; + + spin_lock_irqsave(&b->bm_lock, flags); + if (bm_is_locked(b)) + bm_print_lock_info(mdev); + + s = S2W(enr); + e = min((size_t)S2W(enr+1), b->bm_words); + count = 0; + if (s < b->bm_words) { + int n = e-s; + p_addr = bm_map_paddr(b, s); + bm = p_addr + MLPP(s); + while (n--) { + catch_oob_access_start(); + count += hweight_long(*bm++); + catch_oob_access_end(); + } + bm_unmap(p_addr); + } else { + ERR("start offset (%d) too large in drbd_bm_e_weight\n", s); + } + spin_unlock_irqrestore(&b->bm_lock, flags); + return count; +} + +/* set all bits covered by the AL-extent al_enr */ +unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) +{ + struct drbd_bitmap *b = mdev->bitmap; + unsigned long *p_addr, *bm; + unsigned long weight; + int count, s, e, i, do_now; + ERR_IF(!b) return 0; + ERR_IF(!b->bm_pages) return 0; + + spin_lock_irq(&b->bm_lock); + if (bm_is_locked(b)) + bm_print_lock_info(mdev); + weight = b->bm_set; + + s = al_enr * BM_WORDS_PER_AL_EXT; + e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words); + /* assert that s and e are on the same page */ + D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3) + == s >> (PAGE_SHIFT - LN2_BPL + 3)); + count = 0; + if (s < b->bm_words) { + i = do_now = e-s; + p_addr = bm_map_paddr(b, s); + bm = p_addr + MLPP(s); + while (i--) { + catch_oob_access_start(); + count += hweight_long(*bm); + *bm = -1UL; + catch_oob_access_end(); + bm++; + } + bm_unmap(p_addr); + b->bm_set += do_now*BITS_PER_LONG - count; + if (e == b->bm_words) + b->bm_set -= bm_clear_surplus(b); + } else { + ERR("start offset (%d) too large in drbd_bm_ALe_set_all\n", s); + } + weight = b->bm_set - weight; + spin_unlock_irq(&b->bm_lock); + return weight; +} -- cgit v1.2.3 From 954749162c98ca45c42cbc00c9ed6d5d6c0b8bb7 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:12 +0200 Subject: DRBD: request The request state engine. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 1206 +++++++++++++++++++++++++++++++++++++++++ drivers/block/drbd/drbd_req.h | 327 +++++++++++ 2 files changed, 1533 insertions(+) create mode 100644 drivers/block/drbd/drbd_req.c create mode 100644 drivers/block/drbd/drbd_req.h diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c new file mode 100644 index 000000000000..c48fc848e5f3 --- /dev/null +++ b/drivers/block/drbd/drbd_req.c @@ -0,0 +1,1206 @@ +/* + drbd_req.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include + +#include +#include +#include "drbd_int.h" +#include "drbd_req.h" + +/* outside of the ifdef + * because of the _print_rq_state(,FIXME) in barrier_acked */ +STATIC void _print_rq_state(struct drbd_request *req, const char *txt) +{ + const unsigned long s = req->rq_state; + struct drbd_conf *mdev = req->mdev; + const int rw = (req->master_bio == NULL || + bio_data_dir(req->master_bio) == WRITE) ? + 'W' : 'R'; + + INFO("%s %p %c L%c%c%cN%c%c%c%c%c %u (%llus +%u) %s\n", + txt, req, rw, + s & RQ_LOCAL_PENDING ? 'p' : '-', + s & RQ_LOCAL_COMPLETED ? 'c' : '-', + s & RQ_LOCAL_OK ? 'o' : '-', + s & RQ_NET_PENDING ? 'p' : '-', + s & RQ_NET_QUEUED ? 'q' : '-', + s & RQ_NET_SENT ? 's' : '-', + s & RQ_NET_DONE ? 'd' : '-', + s & RQ_NET_OK ? 'o' : '-', + req->epoch, + (unsigned long long)req->sector, + req->size, + conns_to_name(mdev->state.conn)); +} + +/* #define VERBOSE_REQUEST_CODE */ +#if defined(VERBOSE_REQUEST_CODE) || defined(ENABLE_DYNAMIC_TRACE) +STATIC void _print_req_mod(struct drbd_request *req, enum drbd_req_event what) +{ + struct drbd_conf *mdev = req->mdev; + const int rw = (req->master_bio == NULL || + bio_data_dir(req->master_bio) == WRITE) ? + 'W' : 'R'; + + static const char *rq_event_names[] = { + [created] = "created", + [to_be_send] = "to_be_send", + [to_be_submitted] = "to_be_submitted", + [queue_for_net_write] = "queue_for_net_write", + [queue_for_net_read] = "queue_for_net_read", + [send_canceled] = "send_canceled", + [send_failed] = "send_failed", + [handed_over_to_network] = "handed_over_to_network", + [connection_lost_while_pending] = + "connection_lost_while_pending", + [recv_acked_by_peer] = "recv_acked_by_peer", + [write_acked_by_peer] = "write_acked_by_peer", + [neg_acked] = "neg_acked", + [conflict_discarded_by_peer] = "conflict_discarded_by_peer", + [barrier_acked] = "barrier_acked", + [data_received] = "data_received", + [read_completed_with_error] = "read_completed_with_error", + [write_completed_with_error] = "write_completed_with_error", + [completed_ok] = "completed_ok", + }; + + INFO("_req_mod(%p %c ,%s)\n", req, rw, rq_event_names[what]); +} + +# ifdef ENABLE_DYNAMIC_TRACE +# define print_rq_state(R, T) \ + MTRACE(TraceTypeRq, TraceLvlMetrics, _print_rq_state(R, T);) +# define print_req_mod(T, W) \ + MTRACE(TraceTypeRq, TraceLvlMetrics, _print_req_mod(T, W);) +# else +# define print_rq_state(R, T) _print_rq_state(R, T) +# define print_req_mod(T, W) _print_req_mod(T, W) +# endif + +#else +#define print_rq_state(R, T) +#define print_req_mod(T, W) +#endif + +/* Update disk stats at start of I/O request */ +static inline void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio) +{ + const int rw = bio_data_dir(bio); + int cpu; + cpu = part_stat_lock(); + part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); + part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); + mdev->vdisk->part0.in_flight++; +} + +/* Update disk stats when completing request upwards */ +static inline void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) +{ + int rw = bio_data_dir(req->master_bio); + unsigned long duration = jiffies - req->start_time; + int cpu; + cpu = part_stat_lock(); + part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration); + part_round_stats(cpu, &mdev->vdisk->part0); + part_stat_unlock(); + mdev->vdisk->part0.in_flight--; +} + +static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) +{ + const unsigned long s = req->rq_state; + /* if it was a write, we may have to set the corresponding + * bit(s) out-of-sync first. If it had a local part, we need to + * release the reference to the activity log. */ + if (rw == WRITE) { + /* remove it from the transfer log. + * well, only if it had been there in the first + * place... if it had not (local only or conflicting + * and never sent), it should still be "empty" as + * initialised in drbd_req_new(), so we can list_del() it + * here unconditionally */ + list_del(&req->tl_requests); + /* Set out-of-sync unless both OK flags are set + * (local only or remote failed). + * Other places where we set out-of-sync: + * READ with local io-error */ + if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) + drbd_set_out_of_sync(mdev, req->sector, req->size); + + if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) + drbd_set_in_sync(mdev, req->sector, req->size); + + /* one might be tempted to move the drbd_al_complete_io + * to the local io completion callback drbd_endio_pri. + * but, if this was a mirror write, we may only + * drbd_al_complete_io after this is RQ_NET_DONE, + * otherwise the extent could be dropped from the al + * before it has actually been written on the peer. + * if we crash before our peer knows about the request, + * but after the extent has been dropped from the al, + * we would forget to resync the corresponding extent. + */ + if (s & RQ_LOCAL_MASK) { + if (inc_local_if_state(mdev, Failed)) { + drbd_al_complete_io(mdev, req->sector); + dec_local(mdev); + } else if (__ratelimit(&drbd_ratelimit_state)) { + drbd_WARN("Should have called drbd_al_complete_io(, %llu), " + "but my Disk seems to have failed :(\n", + (unsigned long long) req->sector); + } + } + } + + /* if it was a local io error, we want to notify our + * peer about that, and see if we need to + * detach the disk and stuff. + * to avoid allocating some special work + * struct, reuse the request. */ + + /* THINK + * why do we do this not when we detect the error, + * but delay it until it is "done", i.e. possibly + * until the next barrier ack? */ + + if (rw == WRITE && + ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) { + if (!(req->w.list.next == LIST_POISON1 || + list_empty(&req->w.list))) { + /* DEBUG ASSERT only; if this triggers, we + * probably corrupt the worker list here */ + DUMPP(req->w.list.next); + DUMPP(req->w.list.prev); + } + req->w.cb = w_io_error; + drbd_queue_work(&mdev->data.work, &req->w); + /* drbd_req_free() is done in w_io_error */ + } else { + drbd_req_free(req); + } +} + +static void queue_barrier(struct drbd_conf *mdev) +{ + struct drbd_barrier *b; + + /* We are within the req_lock. Once we queued the barrier for sending, + * we set the CREATE_BARRIER bit. It is cleared as soon as a new + * barrier/epoch object is added. This is the only place this bit is + * set. It indicates that the barrier for this epoch is already queued, + * and no new epoch has been created yet. */ + if (test_bit(CREATE_BARRIER, &mdev->flags)) + return; + + b = mdev->newest_barrier; + b->w.cb = w_send_barrier; + /* inc_ap_pending done here, so we won't + * get imbalanced on connection loss. + * dec_ap_pending will be done in got_BarrierAck + * or (on connection loss) in tl_clear. */ + inc_ap_pending(mdev); + drbd_queue_work(&mdev->data.work, &b->w); + set_bit(CREATE_BARRIER, &mdev->flags); +} + +static void _about_to_complete_local_write(struct drbd_conf *mdev, + struct drbd_request *req) +{ + const unsigned long s = req->rq_state; + struct drbd_request *i; + struct Tl_epoch_entry *e; + struct hlist_node *n; + struct hlist_head *slot; + + /* before we can signal completion to the upper layers, + * we may need to close the current epoch */ + if (mdev->state.conn >= Connected && + req->epoch == mdev->newest_barrier->br_number) + queue_barrier(mdev); + + /* we need to do the conflict detection stuff, + * if we have the ee_hash (two_primaries) and + * this has been on the network */ + if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { + const sector_t sector = req->sector; + const int size = req->size; + + /* ASSERT: + * there must be no conflicting requests, since + * they must have been failed on the spot */ +#define OVERLAPS overlaps(sector, size, i->sector, i->size) + slot = tl_hash_slot(mdev, sector); + hlist_for_each_entry(i, n, slot, colision) { + if (OVERLAPS) { + ALERT("LOGIC BUG: completed: %p %llus +%u; " + "other: %p %llus +%u\n", + req, (unsigned long long)sector, size, + i, (unsigned long long)i->sector, i->size); + } + } + + /* maybe "wake" those conflicting epoch entries + * that wait for this request to finish. + * + * currently, there can be only _one_ such ee + * (well, or some more, which would be pending + * DiscardAck not yet sent by the asender...), + * since we block the receiver thread upon the + * first conflict detection, which will wait on + * misc_wait. maybe we want to assert that? + * + * anyways, if we found one, + * we just have to do a wake_up. */ +#undef OVERLAPS +#define OVERLAPS overlaps(sector, size, e->sector, e->size) + slot = ee_hash_slot(mdev, req->sector); + hlist_for_each_entry(e, n, slot, colision) { + if (OVERLAPS) { + wake_up(&mdev->misc_wait); + break; + } + } + } +#undef OVERLAPS +} + +static void _complete_master_bio(struct drbd_conf *mdev, + struct drbd_request *req, int error) +{ + dump_bio(mdev, req->master_bio, 1, req); + bio_endio(req->master_bio, error); + req->master_bio = NULL; + dec_ap_bio(mdev); +} + +void _req_may_be_done(struct drbd_request *req, int error) +{ + const unsigned long s = req->rq_state; + struct drbd_conf *mdev = req->mdev; + int rw; + + print_rq_state(req, "_req_may_be_done"); + + /* we must not complete the master bio, while it is + * still being processed by _drbd_send_zc_bio (drbd_send_dblock) + * not yet acknowledged by the peer + * not yet completed by the local io subsystem + * these flags may get cleared in any order by + * the worker, + * the receiver, + * the bio_endio completion callbacks. + */ + if (s & RQ_NET_QUEUED) + return; + if (s & RQ_NET_PENDING) + return; + if (s & RQ_LOCAL_PENDING) + return; + + if (req->master_bio) { + /* this is data_received (remote read) + * or protocol C WriteAck + * or protocol B RecvAck + * or protocol A "handed_over_to_network" (SendAck) + * or canceled or failed, + * or killed from the transfer log due to connection loss. + */ + + /* + * figure out whether to report success or failure. + * + * report success when at least one of the operations suceeded. + * or, to put the other way, + * only report failure, when both operations failed. + * + * what to do about the failures is handled elsewhere. + * what we need to do here is just: complete the master_bio. + */ + int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); + rw = bio_data_dir(req->master_bio); + + /* remove the request from the conflict detection + * respective block_id verification hash */ + if (!hlist_unhashed(&req->colision)) + hlist_del(&req->colision); + else + D_ASSERT((s & RQ_NET_MASK) == 0); + + /* for writes we need to do some extra housekeeping */ + if (rw == WRITE) + _about_to_complete_local_write(mdev, req); + + /* Update disk stats */ + _drbd_end_io_acct(mdev, req); + + _complete_master_bio(mdev, req, + ok ? 0 : (error ? error : -EIO)); + } else { + /* only WRITE requests can end up here without a master_bio */ + rw = WRITE; + } + + if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { + /* this is disconnected (local only) operation, + * or protocol C WriteAck, + * or protocol A or B BarrierAck, + * or killed from the transfer log due to connection loss. */ + _req_is_done(mdev, req, rw); + } + /* else: network part and not DONE yet. that is + * protocol A or B, barrier ack still pending... */ +} + +/* + * checks whether there was an overlapping request + * or ee already registered. + * + * if so, return 1, in which case this request is completed on the spot, + * without ever being submitted or send. + * + * return 0 if it is ok to submit this request. + * + * NOTE: + * paranoia: assume something above us is broken, and issues different write + * requests for the same block simultaneously... + * + * To ensure these won't be reordered differently on both nodes, resulting in + * diverging data sets, we discard the later one(s). Not that this is supposed + * to happen, but this is the rationale why we also have to check for + * conflicting requests with local origin, and why we have to do so regardless + * of whether we allowed multiple primaries. + * + * BTW, in case we only have one primary, the ee_hash is empty anyways, and the + * second hlist_for_each_entry becomes a noop. This is even simpler than to + * grab a reference on the net_conf, and check for the two_primaries flag... + */ +STATIC int _req_conflicts(struct drbd_request *req) +{ + struct drbd_conf *mdev = req->mdev; + const sector_t sector = req->sector; + const int size = req->size; + struct drbd_request *i; + struct Tl_epoch_entry *e; + struct hlist_node *n; + struct hlist_head *slot; + + D_ASSERT(hlist_unhashed(&req->colision)); + + if (!inc_net(mdev)) + return 0; + + /* BUG_ON */ + ERR_IF (mdev->tl_hash_s == 0) + goto out_no_conflict; + BUG_ON(mdev->tl_hash == NULL); + +#define OVERLAPS overlaps(i->sector, i->size, sector, size) + slot = tl_hash_slot(mdev, sector); + hlist_for_each_entry(i, n, slot, colision) { + if (OVERLAPS) { + ALERT("%s[%u] Concurrent local write detected! " + "[DISCARD L] new: %llus +%u; " + "pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)i->sector, i->size); + goto out_conflict; + } + } + + if (mdev->ee_hash_s) { + /* now, check for overlapping requests with remote origin */ + BUG_ON(mdev->ee_hash == NULL); +#undef OVERLAPS +#define OVERLAPS overlaps(e->sector, e->size, sector, size) + slot = ee_hash_slot(mdev, sector); + hlist_for_each_entry(e, n, slot, colision) { + if (OVERLAPS) { + ALERT("%s[%u] Concurrent remote write detected!" + " [DISCARD L] new: %llus +%u; " + "pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)e->sector, e->size); + goto out_conflict; + } + } + } +#undef OVERLAPS + +out_no_conflict: + /* this is like it should be, and what we expected. + * our users do behave after all... */ + dec_net(mdev); + return 0; + +out_conflict: + dec_net(mdev); + return 1; +} + +/* obviously this could be coded as many single functions + * instead of one huge switch, + * or by putting the code directly in the respective locations + * (as it has been before). + * + * but having it this way + * enforces that it is all in this one place, where it is easier to audit, + * it makes it obvious that whatever "event" "happens" to a request should + * happen "atomically" within the req_lock, + * and it enforces that we have to think in a very structured manner + * about the "events" that may happen to a request during its life time ... + * + * Though I think it is likely that we break this again into many + * static inline void _req_mod_ ## what (req) ... + */ +void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) +{ + struct drbd_conf *mdev = req->mdev; + + if (error && (bio_rw(req->master_bio) != READA)) + ERR("got an _req_mod() errno of %d\n", error); + + print_req_mod(req, what); + + switch (what) { + default: + ERR("LOGIC BUG in %s:%u\n", __FILE__ , __LINE__); + return; + + /* does not happen... + * initialization done in drbd_req_new + case created: + break; + */ + + case to_be_send: /* via network */ + /* reached via drbd_make_request_common + * and from w_read_retry_remote */ + D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + req->rq_state |= RQ_NET_PENDING; + inc_ap_pending(mdev); + break; + + case to_be_submitted: /* locally */ + /* reached via drbd_make_request_common */ + D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK)); + req->rq_state |= RQ_LOCAL_PENDING; + break; + + case completed_ok: + if (bio_data_dir(req->private_bio) == WRITE) + mdev->writ_cnt += req->size>>9; + else + mdev->read_cnt += req->size>>9; + + bio_put(req->private_bio); + req->private_bio = NULL; + + req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); + req->rq_state &= ~RQ_LOCAL_PENDING; + + _req_may_be_done(req, error); + dec_local(mdev); + break; + + case write_completed_with_error: + req->rq_state |= RQ_LOCAL_COMPLETED; + req->rq_state &= ~RQ_LOCAL_PENDING; + + bio_put(req->private_bio); + req->private_bio = NULL; + ALERT("Local WRITE failed sec=%llus size=%u\n", + (unsigned long long)req->sector, req->size); + /* and now: check how to handle local io error. */ + __drbd_chk_io_error(mdev, FALSE); + _req_may_be_done(req, error); + dec_local(mdev); + break; + + case read_completed_with_error: + if (bio_rw(req->master_bio) != READA) + drbd_set_out_of_sync(mdev, req->sector, req->size); + + req->rq_state |= RQ_LOCAL_COMPLETED; + req->rq_state &= ~RQ_LOCAL_PENDING; + + bio_put(req->private_bio); + req->private_bio = NULL; + if (bio_rw(req->master_bio) == READA) { + /* it is legal to fail READA */ + _req_may_be_done(req, error); + dec_local(mdev); + break; + } + /* else */ + ALERT("Local READ failed sec=%llus size=%u\n", + (unsigned long long)req->sector, req->size); + /* _req_mod(req,to_be_send); oops, recursion in static inline */ + D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + req->rq_state |= RQ_NET_PENDING; + inc_ap_pending(mdev); + + __drbd_chk_io_error(mdev, FALSE); + dec_local(mdev); + /* NOTE: if we have no connection, + * or know the peer has no good data either, + * then we don't actually need to "queue_for_net_read", + * but we do so anyways, since the drbd_io_error() + * and the potential state change to "Diskless" + * needs to be done from process context */ + + /* fall through: _req_mod(req,queue_for_net_read); */ + + case queue_for_net_read: + /* READ or READA, and + * no local disk, + * or target area marked as invalid, + * or just got an io-error. */ + /* from drbd_make_request_common + * or from bio_endio during read io-error recovery */ + + /* so we can verify the handle in the answer packet + * corresponding hlist_del is in _req_may_be_done() */ + hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector)); + + set_bit(UNPLUG_REMOTE, &mdev->flags); /* why? */ + + D_ASSERT(req->rq_state & RQ_NET_PENDING); + req->rq_state |= RQ_NET_QUEUED; + req->w.cb = (req->rq_state & RQ_LOCAL_MASK) + ? w_read_retry_remote + : w_send_read_req; + drbd_queue_work(&mdev->data.work, &req->w); + break; + + case queue_for_net_write: + /* assert something? */ + /* from drbd_make_request_common only */ + + hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector)); + /* corresponding hlist_del is in _req_may_be_done() */ + + /* NOTE + * In case the req ended up on the transfer log before being + * queued on the worker, it could lead to this request being + * missed during cleanup after connection loss. + * So we have to do both operations here, + * within the same lock that protects the transfer log. + * + * _req_add_to_epoch(req); this has to be after the + * _maybe_start_new_epoch(req); which happened in + * drbd_make_request_common, because we now may set the bit + * again ourselves to close the current epoch. + * + * Add req to the (now) current epoch (barrier). */ + + /* see drbd_make_request_common, + * just after it grabs the req_lock */ + D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); + + req->epoch = mdev->newest_barrier->br_number; + list_add_tail(&req->tl_requests, + &mdev->newest_barrier->requests); + + /* increment size of current epoch */ + mdev->newest_barrier->n_req++; + + /* queue work item to send data */ + D_ASSERT(req->rq_state & RQ_NET_PENDING); + req->rq_state |= RQ_NET_QUEUED; + req->w.cb = w_send_dblock; + drbd_queue_work(&mdev->data.work, &req->w); + + /* close the epoch, in case it outgrew the limit */ + if (mdev->newest_barrier->n_req >= mdev->net_conf->max_epoch_size) + queue_barrier(mdev); + + break; + + case send_canceled: + /* treat it the same */ + case send_failed: + /* real cleanup will be done from tl_clear. just update flags + * so it is no longer marked as on the worker queue */ + req->rq_state &= ~RQ_NET_QUEUED; + /* if we did it right, tl_clear should be scheduled only after + * this, so this should not be necessary! */ + _req_may_be_done(req, error); + break; + + case handed_over_to_network: + /* assert something? */ + if (bio_data_dir(req->master_bio) == WRITE && + mdev->net_conf->wire_protocol == DRBD_PROT_A) { + /* this is what is dangerous about protocol A: + * pretend it was sucessfully written on the peer. */ + if (req->rq_state & RQ_NET_PENDING) { + dec_ap_pending(mdev); + req->rq_state &= ~RQ_NET_PENDING; + req->rq_state |= RQ_NET_OK; + } /* else: neg-ack was faster... */ + /* it is still not yet RQ_NET_DONE until the + * corresponding epoch barrier got acked as well, + * so we know what to dirty on connection loss */ + } + req->rq_state &= ~RQ_NET_QUEUED; + req->rq_state |= RQ_NET_SENT; + /* because _drbd_send_zc_bio could sleep, and may want to + * dereference the bio even after the "write_acked_by_peer" and + * "completed_ok" events came in, once we return from + * _drbd_send_zc_bio (drbd_send_dblock), we have to check + * whether it is done already, and end it. */ + _req_may_be_done(req, error); + break; + + case connection_lost_while_pending: + /* transfer log cleanup after connection loss */ + /* assert something? */ + if (req->rq_state & RQ_NET_PENDING) + dec_ap_pending(mdev); + req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); + req->rq_state |= RQ_NET_DONE; + /* if it is still queued, we may not complete it here. + * it will be canceled soon. */ + if (!(req->rq_state & RQ_NET_QUEUED)) + _req_may_be_done(req, error); + break; + + case write_acked_by_peer_and_sis: + req->rq_state |= RQ_NET_SIS; + case conflict_discarded_by_peer: + /* for discarded conflicting writes of multiple primarys, + * there is no need to keep anything in the tl, potential + * node crashes are covered by the activity log. */ + req->rq_state |= RQ_NET_DONE; + /* fall through */ + case write_acked_by_peer: + /* protocol C; successfully written on peer. + * Nothing to do here. + * We want to keep the tl in place for all protocols, to cater + * for volatile write-back caches on lower level devices. + * + * A barrier request is expected to have forced all prior + * requests onto stable storage, so completion of a barrier + * request could set NET_DONE right here, and not wait for the + * BarrierAck, but that is an unecessary optimisation. */ + + /* this makes it effectively the same as for: */ + case recv_acked_by_peer: + /* protocol B; pretends to be sucessfully written on peer. + * see also notes above in handed_over_to_network about + * protocol != C */ + req->rq_state |= RQ_NET_OK; + D_ASSERT(req->rq_state & RQ_NET_PENDING); + dec_ap_pending(mdev); + req->rq_state &= ~RQ_NET_PENDING; + _req_may_be_done(req, error); + break; + + case neg_acked: + /* assert something? */ + if (req->rq_state & RQ_NET_PENDING) + dec_ap_pending(mdev); + req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); + + req->rq_state |= RQ_NET_DONE; + _req_may_be_done(req, error); + /* else: done by handed_over_to_network */ + break; + + case barrier_acked: + if (req->rq_state & RQ_NET_PENDING) { + /* barrier came in before all requests have been acked. + * this is bad, because if the connection is lost now, + * we won't be able to clean them up... */ + _print_rq_state(req, + "FIXME (barrier_acked but pending)"); + list_move(&req->tl_requests, &mdev->out_of_sequence_requests); + } + D_ASSERT(req->rq_state & RQ_NET_SENT); + req->rq_state |= RQ_NET_DONE; + _req_may_be_done(req, error); + break; + + case data_received: + D_ASSERT(req->rq_state & RQ_NET_PENDING); + dec_ap_pending(mdev); + req->rq_state &= ~RQ_NET_PENDING; + req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); + _req_may_be_done(req, error); + break; + }; +} + +/* we may do a local read if: + * - we are consistent (of course), + * - or we are generally inconsistent, + * BUT we are still/already IN SYNC for this area. + * since size may be bigger than BM_BLOCK_SIZE, + * we may need to check several bits. + */ +STATIC int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size) +{ + unsigned long sbnr, ebnr; + sector_t esector, nr_sectors; + + if (mdev->state.disk == UpToDate) + return 1; + if (mdev->state.disk >= Outdated) + return 0; + if (mdev->state.disk < Inconsistent) + return 0; + /* state.disk == Inconsistent We will have a look at the BitMap */ + nr_sectors = drbd_get_capacity(mdev->this_bdev); + esector = sector + (size >> 9) - 1; + + D_ASSERT(sector < nr_sectors); + D_ASSERT(esector < nr_sectors); + + sbnr = BM_SECT_TO_BIT(sector); + ebnr = BM_SECT_TO_BIT(esector); + + return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); +} + +STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) +{ + const int rw = bio_rw(bio); + const int size = bio->bi_size; + const sector_t sector = bio->bi_sector; + struct drbd_barrier *b = NULL; + struct drbd_request *req; + int local, remote; + int err = -EIO; + + /* allocate outside of all locks; */ + req = drbd_req_new(mdev, bio); + if (!req) { + dec_ap_bio(mdev); + /* only pass the error to the upper layers. + * if user cannot handle io errors, thats not our business. */ + ERR("could not kmalloc() req\n"); + bio_endio(bio, -ENOMEM); + return 0; + } + + dump_bio(mdev, bio, 0, req); + + local = inc_local(mdev); + if (!local) { + bio_put(req->private_bio); /* or we get a bio leak */ + req->private_bio = NULL; + } + if (rw == WRITE) { + remote = 1; + } else { + /* READ || READA */ + if (local) { + if (!drbd_may_do_local_read(mdev, sector, size)) { + /* we could kick the syncer to + * sync this extent asap, wait for + * it, then continue locally. + * Or just issue the request remotely. + */ + local = 0; + bio_put(req->private_bio); + req->private_bio = NULL; + dec_local(mdev); + } + } + remote = !local && mdev->state.pdsk >= UpToDate; + } + + /* If we have a disk, but a READA request is mapped to remote, + * we are Primary, Inconsistent, SyncTarget. + * Just fail that READA request right here. + * + * THINK: maybe fail all READA when not local? + * or make this configurable... + * if network is slow, READA won't do any good. + */ + if (rw == READA && mdev->state.disk >= Inconsistent && !local) { + err = -EWOULDBLOCK; + goto fail_and_free_req; + } + + /* For WRITES going to the local disk, grab a reference on the target + * extent. This waits for any resync activity in the corresponding + * resync extent to finish, and, if necessary, pulls in the target + * extent into the activity log, which involves further disk io because + * of transactional on-disk meta data updates. */ + if (rw == WRITE && local) + drbd_al_begin_io(mdev, sector); + + remote = remote && (mdev->state.pdsk == UpToDate || + (mdev->state.pdsk == Inconsistent && + mdev->state.conn >= Connected)); + + if (!(local || remote)) { + ERR("IO ERROR: neither local nor remote disk\n"); + goto fail_free_complete; + } + + /* For WRITE request, we have to make sure that we have an + * unused_spare_barrier, in case we need to start a new epoch. + * I try to be smart and avoid to pre-allocate always "just in case", + * but there is a race between testing the bit and pointer outside the + * spinlock, and grabbing the spinlock. + * if we lost that race, we retry. */ + if (rw == WRITE && remote && + mdev->unused_spare_barrier == NULL && + test_bit(CREATE_BARRIER, &mdev->flags)) { +allocate_barrier: + b = kmalloc(sizeof(struct drbd_barrier), GFP_NOIO); + if (!b) { + ERR("Failed to alloc barrier.\n"); + err = -ENOMEM; + goto fail_free_complete; + } + } + + /* GOOD, everything prepared, grab the spin_lock */ + spin_lock_irq(&mdev->req_lock); + + if (remote) { + remote = (mdev->state.pdsk == UpToDate || + (mdev->state.pdsk == Inconsistent && + mdev->state.conn >= Connected)); + if (!remote) + drbd_WARN("lost connection while grabbing the req_lock!\n"); + if (!(local || remote)) { + ERR("IO ERROR: neither local nor remote disk\n"); + spin_unlock_irq(&mdev->req_lock); + goto fail_free_complete; + } + } + + if (b && mdev->unused_spare_barrier == NULL) { + mdev->unused_spare_barrier = b; + b = NULL; + } + if (rw == WRITE && remote && + mdev->unused_spare_barrier == NULL && + test_bit(CREATE_BARRIER, &mdev->flags)) { + /* someone closed the current epoch + * while we were grabbing the spinlock */ + spin_unlock_irq(&mdev->req_lock); + goto allocate_barrier; + } + + + /* Update disk stats */ + _drbd_start_io_acct(mdev, req, bio); + + /* _maybe_start_new_epoch(mdev); + * If we need to generate a write barrier packet, we have to add the + * new epoch (barrier) object, and queue the barrier packet for sending, + * and queue the req's data after it _within the same lock_, otherwise + * we have race conditions were the reorder domains could be mixed up. + * + * Even read requests may start a new epoch and queue the corresponding + * barrier packet. To get the write ordering right, we only have to + * make sure that, if this is a write request and it triggered a + * barrier packet, this request is queued within the same spinlock. */ + if (remote && mdev->unused_spare_barrier && + test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { + _tl_add_barrier(mdev, mdev->unused_spare_barrier); + mdev->unused_spare_barrier = NULL; + } else { + D_ASSERT(!(remote && rw == WRITE && + test_bit(CREATE_BARRIER, &mdev->flags))); + } + + /* NOTE + * Actually, 'local' may be wrong here already, since we may have failed + * to write to the meta data, and may become wrong anytime because of + * local io-error for some other request, which would lead to us + * "detaching" the local disk. + * + * 'remote' may become wrong any time because the network could fail. + * + * This is a harmless race condition, though, since it is handled + * correctly at the appropriate places; so it just deferres the failure + * of the respective operation. + */ + + /* mark them early for readability. + * this just sets some state flags. */ + if (remote) + _req_mod(req, to_be_send, 0); + if (local) + _req_mod(req, to_be_submitted, 0); + + /* check this request on the colison detection hash tables. + * if we have a conflict, just complete it here. + * THINK do we want to check reads, too? (I don't think so...) */ + if (rw == WRITE && _req_conflicts(req)) { + /* this is a conflicting request. + * even though it may have been only _partially_ + * overlapping with one of the currently pending requests, + * without even submitting or sending it, we will + * pretend that it was successfully served right now. + */ + if (local) { + bio_put(req->private_bio); + req->private_bio = NULL; + drbd_al_complete_io(mdev, req->sector); + dec_local(mdev); + local = 0; + } + if (remote) + dec_ap_pending(mdev); + _drbd_end_io_acct(mdev, req); + /* THINK: do we want to fail it (-EIO), or pretend success? */ + bio_endio(req->master_bio, 0); + req->master_bio = NULL; + dec_ap_bio(mdev); + drbd_req_free(req); + remote = 0; + } + + /* NOTE remote first: to get the concurrent write detection right, + * we must register the request before start of local IO. */ + if (remote) { + /* either WRITE and Connected, + * or READ, and no local disk, + * or READ, but not in sync. + */ + if (rw == WRITE) + _req_mod(req, queue_for_net_write, 0); + else + _req_mod(req, queue_for_net_read, 0); + } + spin_unlock_irq(&mdev->req_lock); + kfree(b); /* if someone else has beaten us to it... */ + + if (local) { + req->private_bio->bi_bdev = mdev->bc->backing_bdev; + + dump_internal_bio("Pri", mdev, req->private_bio, 0); + + if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR + : rw == READ ? DRBD_FAULT_DT_RD + : DRBD_FAULT_DT_RA)) + bio_endio(req->private_bio, -EIO); + else + generic_make_request(req->private_bio); + } + + /* we need to plug ALWAYS since we possibly need to kick lo_dev. + * we plug after submit, so we won't miss an unplug event */ + drbd_plug_device(mdev); + + return 0; + +fail_free_complete: + if (rw == WRITE && local) + drbd_al_complete_io(mdev, sector); +fail_and_free_req: + if (local) { + bio_put(req->private_bio); + req->private_bio = NULL; + dec_local(mdev); + } + bio_endio(bio, err); + drbd_req_free(req); + dec_ap_bio(mdev); + kfree(b); + + return 0; +} + +/* helper function for drbd_make_request + * if we can determine just by the mdev (state) that this request will fail, + * return 1 + * otherwise return 0 + */ +static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) +{ + /* Unconfigured */ + if (mdev->state.conn == Disconnecting && + mdev->state.disk == Diskless) + return 1; + + if (mdev->state.role != Primary && + (!allow_oos || is_write)) { + if (__ratelimit(&drbd_ratelimit_state)) { + ERR("Process %s[%u] tried to %s; " + "since we are not in Primary state, " + "we cannot allow this\n", + current->comm, current->pid, + is_write ? "WRITE" : "READ"); + } + return 1; + } + + /* + * Paranoia: we might have been primary, but sync target, or + * even diskless, then lost the connection. + * This should have been handled (panic? suspend?) somehwere + * else. But maybe it was not, so check again here. + * Caution: as long as we do not have a read/write lock on mdev, + * to serialize state changes, this is racy, since we may lose + * the connection *after* we test for the cstate. + */ + if (mdev->state.disk < UpToDate && mdev->state.pdsk < UpToDate) { + if (__ratelimit(&drbd_ratelimit_state)) + ERR("Sorry, I have no access to good data anymore.\n"); + return 1; + } + + return 0; +} + +int drbd_make_request_26(struct request_queue *q, struct bio *bio) +{ + unsigned int s_enr, e_enr; + struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; + + if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) { + bio_endio(bio, -EPERM); + return 0; + } + + /* Reject barrier requests if we know the underlying device does + * not support them. + * XXX: Need to get this info from peer as well some how so we + * XXX: reject if EITHER side/data/metadata area does not support them. + * + * because of those XXX, this is not yet enabled, + * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit. + */ + if (unlikely(bio_barrier(bio) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) { + /* drbd_WARN("Rejecting barrier request as underlying device does not support\n"); */ + bio_endio(bio, -EOPNOTSUPP); + return 0; + } + + /* + * what we "blindly" assume: + */ + D_ASSERT(bio->bi_size > 0); + D_ASSERT((bio->bi_size & 0x1ff) == 0); + D_ASSERT(bio->bi_idx == 0); + + /* to make some things easier, force allignment of requests within the + * granularity of our hash tables */ + s_enr = bio->bi_sector >> HT_SHIFT; + e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; + + if (likely(s_enr == e_enr)) { + inc_ap_bio(mdev, 1); + return drbd_make_request_common(mdev, bio); + } + + /* can this bio be split generically? + * Maybe add our own split-arbitrary-bios function. */ + if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) { + /* rather error out here than BUG in bio_split */ + ERR("bio would need to, but cannot, be split: " + "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", + bio->bi_vcnt, bio->bi_idx, bio->bi_size, + (unsigned long long)bio->bi_sector); + bio_endio(bio, -EINVAL); + } else { + /* This bio crosses some boundary, so we have to split it. */ + struct bio_pair *bp; + /* works for the "do not cross hash slot boundaries" case + * e.g. sector 262269, size 4096 + * s_enr = 262269 >> 6 = 4097 + * e_enr = (262269+8-1) >> 6 = 4098 + * HT_SHIFT = 6 + * sps = 64, mask = 63 + * first_sectors = 64 - (262269 & 63) = 3 + */ + const sector_t sect = bio->bi_sector; + const int sps = 1 << HT_SHIFT; /* sectors per slot */ + const int mask = sps - 1; + const sector_t first_sectors = sps - (sect & mask); + bp = bio_split(bio, +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + bio_split_pool, +#endif + first_sectors); + + /* we need to get a "reference count" (ap_bio_cnt) + * to avoid races with the disconnect/reconnect/suspend code. + * In case we need to split the bio here, we need to get two references + * atomically, otherwise we might deadlock when trying to submit the + * second one! */ + inc_ap_bio(mdev, 2); + + D_ASSERT(e_enr == s_enr + 1); + + drbd_make_request_common(mdev, &bp->bio1); + drbd_make_request_common(mdev, &bp->bio2); + bio_pair_release(bp); + } + return 0; +} + +/* This is called by bio_add_page(). With this function we reduce + * the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs + * units (was AL_EXTENTs). + * + * we do the calculation within the lower 32bit of the byte offsets, + * since we don't care for actual offset, but only check whether it + * would cross "activity log extent" boundaries. + * + * As long as the BIO is emtpy we have to allow at least one bvec, + * regardless of size and offset. so the resulting bio may still + * cross extent boundaries. those are dealt with (bio_split) in + * drbd_make_request_26. + */ +int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) +{ + struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; + unsigned int bio_offset = + (unsigned int)bvm->bi_sector << 9; /* 32 bit */ + unsigned int bio_size = bvm->bi_size; + int limit, backing_limit; + + limit = DRBD_MAX_SEGMENT_SIZE + - ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size); + if (limit < 0) + limit = 0; + if (bio_size == 0) { + if (limit <= bvec->bv_len) + limit = bvec->bv_len; + } else if (limit && inc_local(mdev)) { + struct request_queue * const b = + mdev->bc->backing_bdev->bd_disk->queue; + if (b->merge_bvec_fn && mdev->bc->dc.use_bmbv) { + backing_limit = b->merge_bvec_fn(b, bvm, bvec); + limit = min(limit, backing_limit); + } + dec_local(mdev); + } + return limit; +} diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h new file mode 100644 index 000000000000..f50f95cb9887 --- /dev/null +++ b/drivers/block/drbd/drbd_req.h @@ -0,0 +1,327 @@ +/* + drbd_req.h + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2006-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2006-2008, Lars Ellenberg . + Copyright (C) 2006-2008, Philipp Reisner . + + DRBD is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + DRBD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _DRBD_REQ_H +#define _DRBD_REQ_H + +#include +#include + +#include +#include +#include "drbd_int.h" +#include "drbd_wrappers.h" + +/* The request callbacks will be called in irq context by the IDE drivers, + and in Softirqs/Tasklets/BH context by the SCSI drivers, + and by the receiver and worker in kernel-thread context. + Try to get the locking right :) */ + +/* + * Objects of type struct drbd_request do only exist on a Primary node, and are + * associated with IO requests originating from the block layer above us. + * + * There are quite a few things that may happen to a drbd request + * during its lifetime. + * + * It will be created. + * It will be marked with the intention to be + * submitted to local disk and/or + * send via the network. + * + * It has to be placed on the transfer log and other housekeeping lists, + * In case we have a network connection. + * + * It may be identified as a concurrent (write) request + * and be handled accordingly. + * + * It may me handed over to the local disk subsystem. + * It may be completed by the local disk subsystem, + * either sucessfully or with io-error. + * In case it is a READ request, and it failed locally, + * it may be retried remotely. + * + * It may be queued for sending. + * It may be handed over to the network stack, + * which may fail. + * It may be acknowledged by the "peer" according to the wire_protocol in use. + * this may be a negative ack. + * It may receive a faked ack when the network connection is lost and the + * transfer log is cleaned up. + * Sending may be canceled due to network connection loss. + * When it finally has outlived its time, + * corresponding dirty bits in the resync-bitmap may be cleared or set, + * it will be destroyed, + * and completion will be signalled to the originator, + * with or without "success". + * + * See also documentation/drbd-request-state-overview.dot + * (dot -Tps2 documentation/drbd-request-state-overview.dot | display -) + */ + +enum drbd_req_event { + created, + to_be_send, + to_be_submitted, + + /* XXX yes, now I am inconsistent... + * these two are not "events" but "actions" + * oh, well... */ + queue_for_net_write, + queue_for_net_read, + + send_canceled, + send_failed, + handed_over_to_network, + connection_lost_while_pending, + recv_acked_by_peer, + write_acked_by_peer, + write_acked_by_peer_and_sis, /* and set_in_sync */ + conflict_discarded_by_peer, + neg_acked, + barrier_acked, /* in protocol A and B */ + data_received, /* (remote read) */ + + read_completed_with_error, + write_completed_with_error, + completed_ok, +}; + +/* encoding of request states for now. we don't actually need that many bits. + * we don't need to do atomic bit operations either, since most of the time we + * need to look at the connection state and/or manipulate some lists at the + * same time, so we should hold the request lock anyways. + */ +enum drbd_req_state_bits { + /* 210 + * 000: no local possible + * 001: to be submitted + * UNUSED, we could map: 011: submitted, completion still pending + * 110: completed ok + * 010: completed with error + */ + __RQ_LOCAL_PENDING, + __RQ_LOCAL_COMPLETED, + __RQ_LOCAL_OK, + + /* 76543 + * 00000: no network possible + * 00001: to be send + * 00011: to be send, on worker queue + * 00101: sent, expecting recv_ack (B) or write_ack (C) + * 11101: sent, + * recv_ack (B) or implicit "ack" (A), + * still waiting for the barrier ack. + * master_bio may already be completed and invalidated. + * 11100: write_acked (C), + * data_received (for remote read, any protocol) + * or finally the barrier ack has arrived (B,A)... + * request can be freed + * 01100: neg-acked (write, protocol C) + * or neg-d-acked (read, any protocol) + * or killed from the transfer log + * during cleanup after connection loss + * request can be freed + * 01000: canceled or send failed... + * request can be freed + */ + + /* if "SENT" is not set, yet, this can still fail or be canceled. + * if "SENT" is set already, we still wait for an Ack packet. + * when cleared, the master_bio may be completed. + * in (B,A) the request object may still linger on the transaction log + * until the corresponding barrier ack comes in */ + __RQ_NET_PENDING, + + /* If it is QUEUED, and it is a WRITE, it is also registered in the + * transfer log. Currently we need this flag to avoid conflicts between + * worker canceling the request and tl_clear_barrier killing it from + * transfer log. We should restructure the code so this conflict does + * no longer occur. */ + __RQ_NET_QUEUED, + + /* well, actually only "handed over to the network stack". + * + * TODO can potentially be dropped because of the similar meaning + * of RQ_NET_SENT and ~RQ_NET_QUEUED. + * however it is not exactly the same. before we drop it + * we must ensure that we can tell a request with network part + * from a request without, regardless of what happens to it. */ + __RQ_NET_SENT, + + /* when set, the request may be freed (if RQ_NET_QUEUED is clear). + * basically this means the corresponding BarrierAck was received */ + __RQ_NET_DONE, + + /* whether or not we know (C) or pretend (B,A) that the write + * was successfully written on the peer. + */ + __RQ_NET_OK, + + /* peer called drbd_set_in_sync() for this write */ + __RQ_NET_SIS, + + /* keep this last, its for the RQ_NET_MASK */ + __RQ_NET_MAX, +}; + +#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) +#define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) +#define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) + +#define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ + +#define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) +#define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) +#define RQ_NET_SENT (1UL << __RQ_NET_SENT) +#define RQ_NET_DONE (1UL << __RQ_NET_DONE) +#define RQ_NET_OK (1UL << __RQ_NET_OK) +#define RQ_NET_SIS (1UL << __RQ_NET_SIS) + +/* 0x1f8 */ +#define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK) + +/* epoch entries */ +static inline +struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector) +{ + BUG_ON(mdev->ee_hash_s == 0); + return mdev->ee_hash + + ((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s); +} + +/* transfer log (drbd_request objects) */ +static inline +struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector) +{ + BUG_ON(mdev->tl_hash_s == 0); + return mdev->tl_hash + + ((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s); +} + +/* when we receive the ACK for a write request, + * verify that we actually know about it */ +static inline struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, + u64 id, sector_t sector) +{ + struct hlist_head *slot = tl_hash_slot(mdev, sector); + struct hlist_node *n; + struct drbd_request *req; + + hlist_for_each_entry(req, n, slot, colision) { + if ((unsigned long)req == (unsigned long)id) { + if (req->sector != sector) { + ERR("_ack_id_to_req: found req %p but it has " + "wrong sector (%llus versus %llus)\n", req, + (unsigned long long)req->sector, + (unsigned long long)sector); + break; + } + return req; + } + } + ERR("_ack_id_to_req: failed to find req %p, sector %llus in list\n", + (void *)(unsigned long)id, (unsigned long long)sector); + return NULL; +} + +/* application reads (drbd_request objects) */ +static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) +{ + return mdev->app_reads_hash + + ((unsigned int)(sector) % APP_R_HSIZE); +} + +/* when we receive the answer for a read request, + * verify that we actually know about it */ +static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, + u64 id, sector_t sector) +{ + struct hlist_head *slot = ar_hash_slot(mdev, sector); + struct hlist_node *n; + struct drbd_request *req; + + hlist_for_each_entry(req, n, slot, colision) { + if ((unsigned long)req == (unsigned long)id) { + D_ASSERT(req->sector == sector); + return req; + } + } + return NULL; +} + +static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, + struct bio *bio_src) +{ + struct bio *bio; + struct drbd_request *req = + mempool_alloc(drbd_request_mempool, GFP_NOIO); + if (likely(req)) { + bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */ + + req->rq_state = 0; + req->mdev = mdev; + req->master_bio = bio_src; + req->private_bio = bio; + req->epoch = 0; + req->sector = bio->bi_sector; + req->size = bio->bi_size; + req->start_time = jiffies; + INIT_HLIST_NODE(&req->colision); + INIT_LIST_HEAD(&req->tl_requests); + INIT_LIST_HEAD(&req->w.list); + + bio->bi_private = req; + bio->bi_end_io = drbd_endio_pri; + bio->bi_next = NULL; + } + return req; +} + +static inline void drbd_req_free(struct drbd_request *req) +{ + mempool_free(req, drbd_request_mempool); +} + +static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) +{ + return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); +} + +/* aparently too large to be inlined... + * moved to drbd_req.c */ +extern void _req_may_be_done(struct drbd_request *req, int error); +extern void _req_mod(struct drbd_request *req, + enum drbd_req_event what, int error); + +/* If you need it irqsave, do it your self! */ +static inline void req_mod(struct drbd_request *req, + enum drbd_req_event what, int error) +{ + struct drbd_conf *mdev = req->mdev; + spin_lock_irq(&mdev->req_lock); + _req_mod(req, what, error); + spin_unlock_irq(&mdev->req_lock); +} +#endif -- cgit v1.2.3 From 954d89ad299b5ee9a11a587339a7b7c21aadd73a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:13 +0200 Subject: DRBD: userspace_interface DRBD uses netlink via connector. The packets are composed of extensible tag lists. That interface can be extended over time without breaking old userspace programs. The nice part of the interface to userspace: drbd.h. The ugly part is for sure drbd_tag_magic.h. I realize that macros are generally frowned upon, but this way it is easier to maintain. The code that gets generated by repeatedly including drbd_nl.h is hard to maintain over time if it is open coded. (BTW, did you know that the samba 4 people are proud to have more than 50% of their code auto generated:) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2426 ++++++++++++++++++++++++++++++++++++++++ include/linux/drbd.h | 372 ++++++ include/linux/drbd_config.h | 43 + include/linux/drbd_limits.h | 133 +++ include/linux/drbd_nl.h | 135 +++ include/linux/drbd_tag_magic.h | 83 ++ 6 files changed, 3192 insertions(+) create mode 100644 drivers/block/drbd/drbd_nl.c create mode 100644 include/linux/drbd.h create mode 100644 include/linux/drbd_config.h create mode 100644 include/linux/drbd_limits.h create mode 100644 include/linux/drbd_nl.h create mode 100644 include/linux/drbd_tag_magic.h diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c new file mode 100644 index 000000000000..ee45093d2546 --- /dev/null +++ b/drivers/block/drbd/drbd_nl.c @@ -0,0 +1,2426 @@ +/* + drbd_nl.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include +#include +#include /* for fsync_bdev */ +#include +#include +#include +#include +#include +#include + +#include "drbd_int.h" +#include "drbd_wrappers.h" +#include +#include + +/* see get_sb_bdev and bd_claim */ +static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; + +/* Generate the tag_list to struct functions */ +#define NL_PACKET(name, number, fields) \ +STATIC int name ## _from_tags(struct drbd_conf *mdev, \ + unsigned short *tags, struct name *arg) \ +{ \ + int tag; \ + int dlen; \ + \ + while ((tag = *tags++) != TT_END) { \ + dlen = *tags++; \ + switch (tag_number(tag)) { \ + fields \ + default: \ + if (tag & T_MANDATORY) { \ + ERR("Unknown tag: %d\n", tag_number(tag)); \ + return 0; \ + } \ + } \ + tags = (unsigned short *)((char *)tags + dlen); \ + } \ + return 1; \ +} +#define NL_INTEGER(pn, pr, member) \ + case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \ + arg->member = *(int *)(tags); \ + break; +#define NL_INT64(pn, pr, member) \ + case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \ + arg->member = *(u64 *)(tags); \ + break; +#define NL_BIT(pn, pr, member) \ + case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \ + arg->member = *(char *)(tags) ? 1 : 0; \ + break; +#define NL_STRING(pn, pr, member, len) \ + case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \ + if (dlen > len) { \ + ERR("arg too long: %s (%u wanted, max len: %u bytes)\n", \ + #member, dlen, (unsigned int)len); \ + return 0; \ + } \ + arg->member ## _len = dlen; \ + memcpy(arg->member, tags, min_t(size_t, dlen, len)); \ + break; +#include "linux/drbd_nl.h" + +/* Generate the struct to tag_list functions */ +#define NL_PACKET(name, number, fields) \ +STATIC unsigned short* \ +name ## _to_tags(struct drbd_conf *mdev, \ + struct name *arg, unsigned short *tags) \ +{ \ + fields \ + return tags; \ +} + +#define NL_INTEGER(pn, pr, member) \ + *tags++ = pn | pr | TT_INTEGER; \ + *tags++ = sizeof(int); \ + *(int *)tags = arg->member; \ + tags = (unsigned short *)((char *)tags+sizeof(int)); +#define NL_INT64(pn, pr, member) \ + *tags++ = pn | pr | TT_INT64; \ + *tags++ = sizeof(u64); \ + *(u64 *)tags = arg->member; \ + tags = (unsigned short *)((char *)tags+sizeof(u64)); +#define NL_BIT(pn, pr, member) \ + *tags++ = pn | pr | TT_BIT; \ + *tags++ = sizeof(char); \ + *(char *)tags = arg->member; \ + tags = (unsigned short *)((char *)tags+sizeof(char)); +#define NL_STRING(pn, pr, member, len) \ + *tags++ = pn | pr | TT_STRING; \ + *tags++ = arg->member ## _len; \ + memcpy(tags, arg->member, arg->member ## _len); \ + tags = (unsigned short *)((char *)tags + arg->member ## _len); +#include "linux/drbd_nl.h" + +void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name); +void drbd_nl_send_reply(struct cn_msg *, int); + +STATIC char *nl_packet_name(int packet_type) +{ +/* Generate packet type strings */ +#define NL_PACKET(name, number, fields) \ + [P_ ## name] = # name, +#define NL_INTEGER Argh! +#define NL_BIT Argh! +#define NL_INT64 Argh! +#define NL_STRING Argh! + + static char *nl_tag_name[P_nl_after_last_packet] = { +#include "linux/drbd_nl.h" + }; + + return (packet_type < sizeof(nl_tag_name)/sizeof(nl_tag_name[0])) ? + nl_tag_name[packet_type] : "*Unknown*"; +} + +STATIC void nl_trace_packet(void *data) +{ + struct cn_msg *req = data; + struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; + + printk(KERN_INFO "drbd%d: " + "Netlink: << %s (%d) - seq: %x, ack: %x, len: %x\n", + nlp->drbd_minor, + nl_packet_name(nlp->packet_type), + nlp->packet_type, + req->seq, req->ack, req->len); +} + +STATIC void nl_trace_reply(void *data) +{ + struct cn_msg *req = data; + struct drbd_nl_cfg_reply *nlp = (struct drbd_nl_cfg_reply *)req->data; + + printk(KERN_INFO "drbd%d: " + "Netlink: >> %s (%d) - seq: %x, ack: %x, len: %x\n", + nlp->minor, + nlp->packet_type == P_nl_after_last_packet ? + "Empty-Reply" : nl_packet_name(nlp->packet_type), + nlp->packet_type, + req->seq, req->ack, req->len); +} + +int drbd_khelper(struct drbd_conf *mdev, char *cmd) +{ + char mb[12]; + char *argv[] = {usermode_helper, cmd, mb, NULL }; + int ret; + static char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL }; + + snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); + + INFO("helper command: %s %s %s\n", usermode_helper, cmd, mb); + + drbd_bcast_ev_helper(mdev, cmd); + ret = call_usermodehelper(usermode_helper, argv, envp, 1); + if (ret) + drbd_WARN("helper command: %s %s %s exit code %u (0x%x)\n", + usermode_helper, cmd, mb, + (ret >> 8) & 0xff, ret); + else + INFO("helper command: %s %s %s exit code %u (0x%x)\n", + usermode_helper, cmd, mb, + (ret >> 8) & 0xff, ret); + + if (ret < 0) /* Ignore any ERRNOs we got. */ + ret = 0; + + return ret; +} + +enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) +{ + char *ex_to_string; + int r; + enum drbd_disk_state nps; + enum fencing_policy fp; + + D_ASSERT(mdev->state.pdsk == DUnknown); + + if (inc_local_if_state(mdev, Consistent)) { + fp = mdev->bc->dc.fencing; + dec_local(mdev); + } else { + drbd_WARN("Not fencing peer, I'm not even Consistent myself.\n"); + return mdev->state.pdsk; + } + + if (fp == Stonith) + _drbd_request_state(mdev, NS(susp, 1), ChgWaitComplete); + + r = drbd_khelper(mdev, "fence-peer"); + + switch ((r>>8) & 0xff) { + case 3: /* peer is inconsistent */ + ex_to_string = "peer is inconsistent or worse"; + nps = Inconsistent; + break; + case 4: + ex_to_string = "peer is outdated"; + nps = Outdated; + break; + case 5: /* peer was down, we will(have) create(d) a new UUID anyways... */ + /* If we would be more strict, we would return DUnknown here. */ + ex_to_string = "peer is unreachable, assumed to be dead"; + nps = Outdated; + break; + case 6: /* Peer is primary, voluntarily outdate myself. + * This is useful when an unconnected Secondary is asked to + * become Primary, but findes the other peer being active. */ + ex_to_string = "peer is active"; + drbd_WARN("Peer is primary, outdating myself.\n"); + nps = DUnknown; + _drbd_request_state(mdev, NS(disk, Outdated), ChgWaitComplete); + break; + case 7: + if (fp != Stonith) + ERR("fence-peer() = 7 && fencing != Stonith !!!\n"); + ex_to_string = "peer was stonithed"; + nps = Outdated; + break; + default: + /* The script is broken ... */ + nps = DUnknown; + ERR("fence-peer helper broken, returned %d\n", (r>>8)&0xff); + return nps; + } + + INFO("fence-peer helper returned %d (%s)\n", + (r>>8) & 0xff, ex_to_string); + return nps; +} + + +int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) +{ + const int max_tries = 4; + int r = 0; + int try = 0; + int forced = 0; + union drbd_state_t mask, val; + enum drbd_disk_state nps; + + if (new_role == Primary) + request_ping(mdev); /* Detect a dead peer ASAP */ + + mutex_lock(&mdev->state_mutex); + + mask.i = 0; mask.role = role_mask; + val.i = 0; val.role = new_role; + + while (try++ < max_tries) { + r = _drbd_request_state(mdev, mask, val, ChgWaitComplete); + + /* in case we first succeeded to outdate, + * but now suddenly could establish a connection */ + if (r == SS_CW_FailedByPeer && mask.pdsk != 0) { + val.pdsk = 0; + mask.pdsk = 0; + continue; + } + + if (r == SS_NoUpToDateDisk && force && + (mdev->state.disk == Inconsistent || + mdev->state.disk == Outdated)) { + mask.disk = disk_mask; + val.disk = UpToDate; + forced = 1; + continue; + } + + if (r == SS_NoUpToDateDisk && + mdev->state.disk == Consistent) { + D_ASSERT(mdev->state.pdsk == DUnknown); + nps = drbd_try_outdate_peer(mdev); + + if (nps == Outdated) { + val.disk = UpToDate; + mask.disk = disk_mask; + } + + val.pdsk = nps; + mask.pdsk = disk_mask; + + continue; + } + + if (r == SS_NothingToDo) + goto fail; + if (r == SS_PrimaryNOP) { + nps = drbd_try_outdate_peer(mdev); + + if (force && nps > Outdated) { + drbd_WARN("Forced into split brain situation!\n"); + nps = Outdated; + } + + mask.pdsk = disk_mask; + val.pdsk = nps; + + continue; + } + if (r == SS_TwoPrimaries) { + /* Maybe the peer is detected as dead very soon... + retry at most once more in this case. */ + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10); + if (try < max_tries) + try = max_tries - 1; + continue; + } + if (r < SS_Success) { + r = _drbd_request_state(mdev, mask, val, + ChgStateVerbose + ChgWaitComplete); + if (r < SS_Success) + goto fail; + } + break; + } + + if (forced) + drbd_WARN("Forced to consider local data as UpToDate!\n"); + + fsync_bdev(mdev->this_bdev); + + /* Wait until nothing is on the fly :) */ + wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); + + if (new_role == Secondary) { + set_disk_ro(mdev->vdisk, TRUE); + if (inc_local(mdev)) { + mdev->bc->md.uuid[Current] &= ~(u64)1; + dec_local(mdev); + } + } else { + if (inc_net(mdev)) { + mdev->net_conf->want_lose = 0; + dec_net(mdev); + } + set_disk_ro(mdev->vdisk, FALSE); + if (inc_local(mdev)) { + if (((mdev->state.conn < Connected || + mdev->state.pdsk <= Failed) + && mdev->bc->md.uuid[Bitmap] == 0) || forced) + drbd_uuid_new_current(mdev); + + mdev->bc->md.uuid[Current] |= (u64)1; + dec_local(mdev); + } + } + + if ((new_role == Secondary) && inc_local(mdev)) { + drbd_al_to_on_disk_bm(mdev); + dec_local(mdev); + } + + if (mdev->state.conn >= WFReportParams) { + /* if this was forced, we should consider sync */ + if (forced) + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + + drbd_md_sync(mdev); + + drbd_kobject_uevent(mdev); + fail: + mutex_unlock(&mdev->state_mutex); + return r; +} + + +STATIC int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + struct primary primary_args; + + memset(&primary_args, 0, sizeof(struct primary)); + if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) { + reply->ret_code = UnknownMandatoryTag; + return 0; + } + + reply->ret_code = + drbd_set_role(mdev, Primary, primary_args.overwrite_peer); + + return 0; +} + +STATIC int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + reply->ret_code = drbd_set_role(mdev, Secondary, 0); + + return 0; +} + +/* initializes the md.*_offset members, so we are able to find + * the on disk meta data */ +STATIC void drbd_md_set_sector_offsets(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev) +{ + sector_t md_size_sect = 0; + switch (bdev->dc.meta_dev_idx) { + default: + /* v07 style fixed size indexed meta data */ + bdev->md.md_size_sect = MD_RESERVED_SECT; + bdev->md.md_offset = drbd_md_ss__(mdev, bdev); + bdev->md.al_offset = MD_AL_OFFSET; + bdev->md.bm_offset = MD_BM_OFFSET; + break; + case DRBD_MD_INDEX_FLEX_EXT: + /* just occupy the full device; unit: sectors */ + bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev); + bdev->md.md_offset = 0; + bdev->md.al_offset = MD_AL_OFFSET; + bdev->md.bm_offset = MD_BM_OFFSET; + break; + case DRBD_MD_INDEX_INTERNAL: + case DRBD_MD_INDEX_FLEX_INT: + bdev->md.md_offset = drbd_md_ss__(mdev, bdev); + /* al size is still fixed */ + bdev->md.al_offset = -MD_AL_MAX_SIZE; + /* we need (slightly less than) ~ this much bitmap sectors: */ + md_size_sect = drbd_get_capacity(bdev->backing_bdev); + md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); + md_size_sect = BM_SECT_TO_EXT(md_size_sect); + md_size_sect = ALIGN(md_size_sect, 8); + + /* plus the "drbd meta data super block", + * and the activity log; */ + md_size_sect += MD_BM_OFFSET; + + bdev->md.md_size_sect = md_size_sect; + /* bitmap offset is adjusted by 'super' block size */ + bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET; + break; + } +} + +char *ppsize(char *buf, unsigned long long size) +{ + /* Needs 9 bytes at max. */ + static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; + int base = 0; + while (size >= 10000) { + /* shift + round */ + size = (size >> 10) + !!(size & (1<<9)); + base++; + } + sprintf(buf, "%lu %cB", (long)size, units[base]); + + return buf; +} + +/* there is still a theoretical deadlock when called from receiver + * on an Inconsistent Primary: + * remote READ does inc_ap_bio, receiver would need to receive answer + * packet from remote to dec_ap_bio again. + * receiver receive_sizes(), comes here, + * waits for ap_bio_cnt == 0. -> deadlock. + * but this cannot happen, actually, because: + * Primary Inconsistent, and peer's disk is unreachable + * (not connected, * or bad/no disk on peer): + * see drbd_fail_request_early, ap_bio_cnt is zero. + * Primary Inconsistent, and SyncTarget: + * peer may not initiate a resize. + */ +void drbd_suspend_io(struct drbd_conf *mdev) +{ + int in_flight; + set_bit(SUSPEND_IO, &mdev->flags); + in_flight = atomic_read(&mdev->ap_bio_cnt); + if (in_flight) + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); +} + +void drbd_resume_io(struct drbd_conf *mdev) +{ + clear_bit(SUSPEND_IO, &mdev->flags); + wake_up(&mdev->misc_wait); +} + +/** + * drbd_determin_dev_size: + * Evaluates all constraints and sets our correct device size. + * Negative return values indicate errors. 0 and positive values + * indicate success. + * You should call drbd_md_sync() after calling this function. + */ +enum determin_dev_size_enum drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local) +{ + sector_t prev_first_sect, prev_size; /* previous meta location */ + sector_t la_size; + sector_t size; + char ppb[10]; + + int md_moved, la_size_changed; + enum determin_dev_size_enum rv = unchanged; + + /* race: + * application request passes inc_ap_bio, + * but then cannot get an AL-reference. + * this function later may wait on ap_bio_cnt == 0. -> deadlock. + * + * to avoid that: + * Suspend IO right here. + * still lock the act_log to not trigger ASSERTs there. + */ + drbd_suspend_io(mdev); + + /* no wait necessary anymore, actually we could assert that */ + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + + prev_first_sect = drbd_md_first_sector(mdev->bc); + prev_size = mdev->bc->md.md_size_sect; + la_size = mdev->bc->md.la_size_sect; + + /* TODO: should only be some assert here, not (re)init... */ + drbd_md_set_sector_offsets(mdev, mdev->bc); + + size = drbd_new_dev_size(mdev, mdev->bc); + + if (drbd_get_capacity(mdev->this_bdev) != size || + drbd_bm_capacity(mdev) != size) { + int err; + err = drbd_bm_resize(mdev, size); + if (unlikely(err)) { + /* currently there is only one error: ENOMEM! */ + size = drbd_bm_capacity(mdev)>>1; + if (size == 0) { + ERR("OUT OF MEMORY! " + "Could not allocate bitmap! "); + } else { + ERR("BM resizing failed. " + "Leaving size unchanged at size = %lu KB\n", + (unsigned long)size); + } + rv = dev_size_error; + } + /* racy, see comments above. */ + drbd_set_my_capacity(mdev, size); + mdev->bc->md.la_size_sect = size; + INFO("size = %s (%llu KB)\n", ppsize(ppb, size>>1), + (unsigned long long)size>>1); + } + if (rv == dev_size_error) + goto out; + + la_size_changed = (la_size != mdev->bc->md.la_size_sect); + + md_moved = prev_first_sect != drbd_md_first_sector(mdev->bc) + || prev_size != mdev->bc->md.md_size_sect; + + if (md_moved) { + drbd_WARN("Moving meta-data.\n"); + /* assert: (flexible) internal meta data */ + } + + if (la_size_changed || md_moved) { + drbd_al_shrink(mdev); /* All extents inactive. */ + INFO("Writing the whole bitmap, size changed\n"); + rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); + drbd_md_mark_dirty(mdev); + } + + if (size > la_size) + rv = grew; + if (size < la_size) + rv = shrunk; +out: + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + drbd_resume_io(mdev); + + return rv; +} + +sector_t +drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) +{ + sector_t p_size = mdev->p_size; /* partner's disk size. */ + sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */ + sector_t m_size; /* my size */ + sector_t u_size = bdev->dc.disk_size; /* size requested by user. */ + sector_t size = 0; + + m_size = drbd_get_max_capacity(bdev); + + if (p_size && m_size) { + size = min_t(sector_t, p_size, m_size); + } else { + if (la_size) { + size = la_size; + if (m_size && m_size < size) + size = m_size; + if (p_size && p_size < size) + size = p_size; + } else { + if (m_size) + size = m_size; + if (p_size) + size = p_size; + } + } + + if (size == 0) + ERR("Both nodes diskless!\n"); + + if (u_size) { + if (u_size > size) + ERR("Requested disk size is too big (%lu > %lu)\n", + (unsigned long)u_size>>1, (unsigned long)size>>1); + else + size = u_size; + } + + return size; +} + +/** + * drbd_check_al_size: + * checks that the al lru is of requested size, and if neccessary tries to + * allocate a new one. returns -EBUSY if current al lru is still used, + * -ENOMEM when allocation failed, and 0 on success. You should call + * drbd_md_sync() after you called this function. + */ +STATIC int drbd_check_al_size(struct drbd_conf *mdev) +{ + struct lru_cache *n, *t; + struct lc_element *e; + unsigned int in_use; + int i; + + ERR_IF(mdev->sync_conf.al_extents < 7) + mdev->sync_conf.al_extents = 127; + + if (mdev->act_log && + mdev->act_log->nr_elements == mdev->sync_conf.al_extents) + return 0; + + in_use = 0; + t = mdev->act_log; + n = lc_alloc("act_log", mdev->sync_conf.al_extents, + sizeof(struct lc_element), mdev); + + if (n == NULL) { + ERR("Cannot allocate act_log lru!\n"); + return -ENOMEM; + } + spin_lock_irq(&mdev->al_lock); + if (t) { + for (i = 0; i < t->nr_elements; i++) { + e = lc_entry(t, i); + if (e->refcnt) + ERR("refcnt(%d)==%d\n", + e->lc_number, e->refcnt); + in_use += e->refcnt; + } + } + if (!in_use) + mdev->act_log = n; + spin_unlock_irq(&mdev->al_lock); + if (in_use) { + ERR("Activity log still in use!\n"); + lc_free(n); + return -EBUSY; + } else { + if (t) + lc_free(t); + } + drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */ + return 0; +} + +void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local) +{ + struct request_queue * const q = mdev->rq_queue; + struct request_queue * const b = mdev->bc->backing_bdev->bd_disk->queue; + /* unsigned int old_max_seg_s = q->max_segment_size; */ + int max_segments = mdev->bc->dc.max_bio_bvecs; + + if (b->merge_bvec_fn && !mdev->bc->dc.use_bmbv) + max_seg_s = PAGE_SIZE; + + max_seg_s = min(b->max_sectors * b->hardsect_size, max_seg_s); + + MTRACE(TraceTypeRq, TraceLvlSummary, + DUMPI(b->max_sectors); + DUMPI(b->max_phys_segments); + DUMPI(b->max_hw_segments); + DUMPI(b->max_segment_size); + DUMPI(b->hardsect_size); + DUMPI(b->seg_boundary_mask); + ); + + q->max_sectors = max_seg_s >> 9; + if (max_segments) { + q->max_phys_segments = max_segments; + q->max_hw_segments = max_segments; + } else { + q->max_phys_segments = MAX_PHYS_SEGMENTS; + q->max_hw_segments = MAX_HW_SEGMENTS; + } + q->max_segment_size = max_seg_s; + q->hardsect_size = 512; + q->seg_boundary_mask = PAGE_SIZE-1; + blk_queue_stack_limits(q, b); + + /* KERNEL BUG. in ll_rw_blk.c ?? + * t->max_segment_size = min(t->max_segment_size,b->max_segment_size); + * should be + * t->max_segment_size = min_not_zero(...,...) + * workaround here: */ + if (q->max_segment_size == 0) + q->max_segment_size = max_seg_s; + + MTRACE(TraceTypeRq, TraceLvlSummary, + DUMPI(q->max_sectors); + DUMPI(q->max_phys_segments); + DUMPI(q->max_hw_segments); + DUMPI(q->max_segment_size); + DUMPI(q->hardsect_size); + DUMPI(q->seg_boundary_mask); + ); + + if (b->merge_bvec_fn) + drbd_WARN("Backing device's merge_bvec_fn() = %p\n", + b->merge_bvec_fn); + INFO("max_segment_size ( = BIO size ) = %u\n", q->max_segment_size); + + if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { + INFO("Adjusting my ra_pages to backing device's (%lu -> %lu)\n", + q->backing_dev_info.ra_pages, + b->backing_dev_info.ra_pages); + q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; + } +} + +/* does always return 0; + * interesting return code is in reply->ret_code */ +STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + enum ret_codes retcode; + enum determin_dev_size_enum dd; + sector_t max_possible_sectors; + sector_t min_md_device_sectors; + struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ + struct inode *inode, *inode2; + struct lru_cache *resync_lru = NULL; + union drbd_state_t ns, os; + int rv, ntries = 0; + int cp_discovered = 0; + int hardsect; + + /* if you want to reconfigure, please tear down first */ + if (mdev->state.disk > Diskless) { + retcode = HaveDiskConfig; + goto fail; + } + + /* + * We may have gotten here very quickly from a detach. Wait for a bit + * then fail. + */ + while (1) { + __no_warn(local, nbc = mdev->bc;); + if (nbc == NULL) + break; + if (ntries++ >= 5) { + drbd_WARN("drbd_nl_disk_conf: mdev->bc not NULL.\n"); + retcode = HaveDiskConfig; + goto fail; + } + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ/10); + } + + nbc = kmalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); + if (!nbc) { + retcode = KMallocFailed; + goto fail; + } + + memset(&nbc->md, 0, sizeof(struct drbd_md)); + + if (!(nlp->flags & DRBD_NL_SET_DEFAULTS) && inc_local(mdev)) { + memcpy(&nbc->dc, &mdev->bc->dc, sizeof(struct disk_conf)); + dec_local(mdev); + } else { + memset(&nbc->dc, 0, sizeof(struct disk_conf)); + nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; + nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; + nbc->dc.fencing = DRBD_FENCING_DEF; + nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; + } + + if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) { + retcode = UnknownMandatoryTag; + goto fail; + } + + nbc->lo_file = NULL; + nbc->md_file = NULL; + + if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { + retcode = LDMDInvalid; + goto fail; + } + + nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); + if (IS_ERR(nbc->lo_file)) { + ERR("open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, + PTR_ERR(nbc->lo_file)); + nbc->lo_file = NULL; + retcode = LDNameInvalid; + goto fail; + } + + inode = nbc->lo_file->f_dentry->d_inode; + + if (!S_ISBLK(inode->i_mode)) { + retcode = LDNoBlockDev; + goto fail; + } + + nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0); + if (IS_ERR(nbc->md_file)) { + ERR("open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, + PTR_ERR(nbc->md_file)); + nbc->md_file = NULL; + retcode = MDNameInvalid; + goto fail; + } + + inode2 = nbc->md_file->f_dentry->d_inode; + + if (!S_ISBLK(inode2->i_mode)) { + retcode = MDNoBlockDev; + goto fail; + } + + nbc->backing_bdev = inode->i_bdev; + if (bd_claim(nbc->backing_bdev, mdev)) { + printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n", + nbc->backing_bdev, mdev, + nbc->backing_bdev->bd_holder, + nbc->backing_bdev->bd_contains->bd_holder, + nbc->backing_bdev->bd_holders); + retcode = LDMounted; + goto fail; + } + + resync_lru = lc_alloc("resync", 61, sizeof(struct bm_extent), mdev); + if (!resync_lru) { + retcode = KMallocFailed; + goto fail; + } + + if (!mdev->bitmap) { + if (drbd_bm_init(mdev)) { + retcode = KMallocFailed; + goto fail; + } + } + + nbc->md_bdev = inode2->i_bdev; + if (bd_claim(nbc->md_bdev, + (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || + nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT) ? + (void *)mdev : (void *) drbd_m_holder)) { + retcode = MDMounted; + goto release_bdev_fail; + } + + if ((nbc->backing_bdev == nbc->md_bdev) != + (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || + nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { + retcode = LDMDInvalid; + goto release_bdev2_fail; + } + + /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ + drbd_md_set_sector_offsets(mdev, nbc); + + if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) { + ERR("max capacity %llu smaller than disk size %llu\n", + (unsigned long long) drbd_get_max_capacity(nbc), + (unsigned long long) nbc->dc.disk_size); + retcode = LDDeviceTooSmall; + goto release_bdev2_fail; + } + + if (nbc->dc.meta_dev_idx < 0) { + max_possible_sectors = DRBD_MAX_SECTORS_FLEX; + /* at least one MB, otherwise it does not make sense */ + min_md_device_sectors = (2<<10); + } else { + max_possible_sectors = DRBD_MAX_SECTORS; + min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1); + } + + if (drbd_get_capacity(nbc->md_bdev) > max_possible_sectors) + drbd_WARN("truncating very big lower level device " + "to currently maximum possible %llu sectors\n", + (unsigned long long) max_possible_sectors); + + if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { + retcode = MDDeviceTooSmall; + drbd_WARN("refusing attach: md-device too small, " + "at least %llu sectors needed for this meta-disk type\n", + (unsigned long long) min_md_device_sectors); + goto release_bdev2_fail; + } + + /* Make sure the new disk is big enough + * (we may currently be Primary with no local disk...) */ + if (drbd_get_max_capacity(nbc) < + drbd_get_capacity(mdev->this_bdev)) { + retcode = LDDeviceTooSmall; + goto release_bdev2_fail; + } + + nbc->known_size = drbd_get_capacity(nbc->backing_bdev); + + drbd_suspend_io(mdev); + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt)); + retcode = _drbd_request_state(mdev, NS(disk, Attaching), ChgStateVerbose); + drbd_resume_io(mdev); + if (retcode < SS_Success) + goto release_bdev2_fail; + + if (!inc_local_if_state(mdev, Attaching)) + goto force_diskless; + + drbd_thread_start(&mdev->worker); + drbd_md_set_sector_offsets(mdev, nbc); + + retcode = drbd_md_read(mdev, nbc); + if (retcode != NoError) + goto force_diskless_dec; + + if (mdev->state.conn < Connected && + mdev->state.role == Primary && + (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[Current] & ~((u64)1))) { + ERR("Can only attach to data with current UUID=%016llX\n", + (unsigned long long)mdev->ed_uuid); + retcode = DataOfWrongCurrent; + goto force_diskless_dec; + } + + /* Since we are diskless, fix the AL first... */ + if (drbd_check_al_size(mdev)) { + retcode = KMallocFailed; + goto force_diskless_dec; + } + + /* Prevent shrinking of consistent devices ! */ + if (drbd_md_test_flag(nbc, MDF_Consistent) && + drbd_new_dev_size(mdev, nbc) < nbc->md.la_size_sect) { + drbd_WARN("refusing to truncate a consistent device\n"); + retcode = LDDeviceTooSmall; + goto force_diskless_dec; + } + + if (!drbd_al_read_log(mdev, nbc)) { + retcode = MDIOError; + goto force_diskless_dec; + } + + /* allocate a second IO page if hardsect != 512 */ + hardsect = drbd_get_hardsect(nbc->md_bdev); + if (hardsect == 0) + hardsect = MD_HARDSECT; + + if (hardsect != MD_HARDSECT) { + if (!mdev->md_io_tmpp) { + struct page *page = alloc_page(GFP_NOIO); + if (!page) + goto force_diskless_dec; + + drbd_WARN("Meta data's bdev hardsect = %d != %d\n", + hardsect, MD_HARDSECT); + drbd_WARN("Workaround engaged (has performace impact).\n"); + + mdev->md_io_tmpp = page; + } + } + + /* Reset the "barriers don't work" bits here, then force meta data to + * be written, to ensure we determine if barriers are supported. */ + if (nbc->dc.no_md_flush) + set_bit(MD_NO_BARRIER, &mdev->flags); + else + clear_bit(MD_NO_BARRIER, &mdev->flags); + + /* Point of no return reached. + * Devices and memory are no longer released by error cleanup below. + * now mdev takes over responsibility, and the state engine should + * clean it up somewhere. */ + D_ASSERT(mdev->bc == NULL); + mdev->bc = nbc; + mdev->resync = resync_lru; + nbc = NULL; + resync_lru = NULL; + + mdev->write_ordering = WO_bio_barrier; + drbd_bump_write_ordering(mdev, WO_bio_barrier); + + if (drbd_md_test_flag(mdev->bc, MDF_CrashedPrimary)) + set_bit(CRASHED_PRIMARY, &mdev->flags); + else + clear_bit(CRASHED_PRIMARY, &mdev->flags); + + if (drbd_md_test_flag(mdev->bc, MDF_PrimaryInd)) { + set_bit(CRASHED_PRIMARY, &mdev->flags); + cp_discovered = 1; + } + + mdev->send_cnt = 0; + mdev->recv_cnt = 0; + mdev->read_cnt = 0; + mdev->writ_cnt = 0; + + drbd_setup_queue_param(mdev, DRBD_MAX_SEGMENT_SIZE); + + /* If I am currently not Primary, + * but meta data primary indicator is set, + * I just now recover from a hard crash, + * and have been Primary before that crash. + * + * Now, if I had no connection before that crash + * (have been degraded Primary), chances are that + * I won't find my peer now either. + * + * In that case, and _only_ in that case, + * we use the degr-wfc-timeout instead of the default, + * so we can automatically recover from a crash of a + * degraded but active "cluster" after a certain timeout. + */ + clear_bit(USE_DEGR_WFC_T, &mdev->flags); + if (mdev->state.role != Primary && + drbd_md_test_flag(mdev->bc, MDF_PrimaryInd) && + !drbd_md_test_flag(mdev->bc, MDF_ConnectedInd)) + set_bit(USE_DEGR_WFC_T, &mdev->flags); + + dd = drbd_determin_dev_size(mdev); + if (dd == dev_size_error) { + retcode = VMallocFailed; + goto force_diskless_dec; + } else if (dd == grew) + set_bit(RESYNC_AFTER_NEG, &mdev->flags); + + if (drbd_md_test_flag(mdev->bc, MDF_FullSync)) { + INFO("Assuming that all blocks are out of sync " + "(aka FullSync)\n"); + if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) { + retcode = MDIOError; + goto force_diskless_dec; + } + } else { + if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) { + retcode = MDIOError; + goto force_diskless_dec; + } + } + + if (cp_discovered) { + drbd_al_apply_to_bm(mdev); + drbd_al_to_on_disk_bm(mdev); + } + + spin_lock_irq(&mdev->req_lock); + os = mdev->state; + ns.i = os.i; + /* If MDF_Consistent is not set go into inconsistent state, + otherwise investige MDF_WasUpToDate... + If MDF_WasUpToDate is not set go into Outdated disk state, + otherwise into Consistent state. + */ + if (drbd_md_test_flag(mdev->bc, MDF_Consistent)) { + if (drbd_md_test_flag(mdev->bc, MDF_WasUpToDate)) + ns.disk = Consistent; + else + ns.disk = Outdated; + } else { + ns.disk = Inconsistent; + } + + if (drbd_md_test_flag(mdev->bc, MDF_PeerOutDated)) + ns.pdsk = Outdated; + + if ( ns.disk == Consistent && + (ns.pdsk == Outdated || mdev->bc->dc.fencing == DontCare)) + ns.disk = UpToDate; + + /* All tests on MDF_PrimaryInd, MDF_ConnectedInd, + MDF_Consistent and MDF_WasUpToDate must happen before + this point, because drbd_request_state() modifies these + flags. */ + + /* In case we are Connected postpone any desicion on the new disk + state after the negotiation phase. */ + if (mdev->state.conn == Connected) { + mdev->new_state_tmp.i = ns.i; + ns.i = os.i; + ns.disk = Negotiating; + } + + rv = _drbd_set_state(mdev, ns, ChgStateVerbose, NULL); + ns = mdev->state; + spin_unlock_irq(&mdev->req_lock); + + if (rv < SS_Success) + goto force_diskless_dec; + + if (mdev->state.role == Primary) + mdev->bc->md.uuid[Current] |= (u64)1; + else + mdev->bc->md.uuid[Current] &= ~(u64)1; + + drbd_md_mark_dirty(mdev); + drbd_md_sync(mdev); + + drbd_kobject_uevent(mdev); + dec_local(mdev); + reply->ret_code = retcode; + return 0; + + force_diskless_dec: + dec_local(mdev); + force_diskless: + drbd_force_state(mdev, NS(disk, Diskless)); + drbd_md_sync(mdev); + release_bdev2_fail: + if (nbc) + bd_release(nbc->md_bdev); + release_bdev_fail: + if (nbc) + bd_release(nbc->backing_bdev); + fail: + if (nbc) { + if (nbc->lo_file) + fput(nbc->lo_file); + if (nbc->md_file) + fput(nbc->md_file); + kfree(nbc); + } + if (resync_lru) + lc_free(resync_lru); + + reply->ret_code = retcode; + return 0; +} + +STATIC int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + fsync_bdev(mdev->this_bdev); + reply->ret_code = drbd_request_state(mdev, NS(disk, Diskless)); + + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ/20); /* 50ms; Time for worker to finally terminate */ + + return 0; +} + +#define HMAC_NAME_L 20 + +STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int i, ns; + enum ret_codes retcode; + struct net_conf *new_conf = NULL; + struct crypto_hash *tfm = NULL; + struct crypto_hash *integrity_w_tfm = NULL; + struct crypto_hash *integrity_r_tfm = NULL; + struct hlist_head *new_tl_hash = NULL; + struct hlist_head *new_ee_hash = NULL; + struct drbd_conf *odev; + char hmac_name[HMAC_NAME_L]; + void *int_dig_out = NULL; + void *int_dig_in = NULL; + void *int_dig_vv = NULL; + + if (mdev->state.conn > StandAlone) { + retcode = HaveNetConfig; + goto fail; + } + + new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); + if (!new_conf) { + retcode = KMallocFailed; + goto fail; + } + + if (!(nlp->flags & DRBD_NL_SET_DEFAULTS) && inc_net(mdev)) { + memcpy(new_conf, mdev->net_conf, sizeof(struct net_conf)); + dec_net(mdev); + } else { + memset(new_conf, 0, sizeof(struct net_conf)); + new_conf->timeout = DRBD_TIMEOUT_DEF; + new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; + new_conf->ping_int = DRBD_PING_INT_DEF; + new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; + new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; + new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; + new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; + new_conf->ko_count = DRBD_KO_COUNT_DEF; + new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; + new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; + new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF; + new_conf->want_lose = 0; + new_conf->two_primaries = 0; + new_conf->wire_protocol = DRBD_PROT_C; + new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; + new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; + } + + if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { + retcode = UnknownMandatoryTag; + goto fail; + } + + if (new_conf->two_primaries + && (new_conf->wire_protocol != DRBD_PROT_C)) { + retcode = ProtocolCRequired; + goto fail; + }; + + if (mdev->state.role == Primary && new_conf->want_lose) { + retcode = DiscardNotAllowed; + goto fail; + } + +#define M_ADDR(A) (((struct sockaddr_in *)&A->my_addr)->sin_addr.s_addr) +#define M_PORT(A) (((struct sockaddr_in *)&A->my_addr)->sin_port) +#define O_ADDR(A) (((struct sockaddr_in *)&A->peer_addr)->sin_addr.s_addr) +#define O_PORT(A) (((struct sockaddr_in *)&A->peer_addr)->sin_port) + retcode = NoError; + for (i = 0; i < minor_count; i++) { + odev = minor_to_mdev(i); + if (!odev || odev == mdev) + continue; + if (inc_net(odev)) { + if (M_ADDR(new_conf) == M_ADDR(odev->net_conf) && + M_PORT(new_conf) == M_PORT(odev->net_conf)) + retcode = LAAlreadyInUse; + + if (O_ADDR(new_conf) == O_ADDR(odev->net_conf) && + O_PORT(new_conf) == O_PORT(odev->net_conf)) + retcode = OAAlreadyInUse; + + dec_net(odev); + if (retcode != NoError) + goto fail; + } + } +#undef M_ADDR +#undef M_PORT +#undef O_ADDR +#undef O_PORT + + if (new_conf->cram_hmac_alg[0] != 0) { + snprintf(hmac_name, HMAC_NAME_L, "hmac(%s)", + new_conf->cram_hmac_alg); + tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + tfm = NULL; + retcode = CRAMAlgNotAvail; + goto fail; + } + + if (crypto_tfm_alg_type(crypto_hash_tfm(tfm)) + != CRYPTO_ALG_TYPE_HASH) { + retcode = CRAMAlgNotDigest; + goto fail; + } + } + + if (new_conf->integrity_alg[0]) { + integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(integrity_w_tfm)) { + integrity_w_tfm = NULL; + retcode=IntegrityAlgNotAvail; + goto fail; + } + + if (crypto_tfm_alg_type(crypto_hash_tfm(integrity_w_tfm)) != CRYPTO_ALG_TYPE_DIGEST) { + retcode=IntegrityAlgNotDigest; + goto fail; + } + + integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(integrity_r_tfm)) { + integrity_r_tfm = NULL; + retcode=IntegrityAlgNotAvail; + goto fail; + } + } + + ns = new_conf->max_epoch_size/8; + if (mdev->tl_hash_s != ns) { + new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); + if (!new_tl_hash) { + retcode = KMallocFailed; + goto fail; + } + } + + ns = new_conf->max_buffers/8; + if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) { + new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); + if (!new_ee_hash) { + retcode = KMallocFailed; + goto fail; + } + } + + ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; + +#if 0 + /* for the connection loss logic in drbd_recv + * I _need_ the resulting timeo in jiffies to be + * non-zero and different + * + * XXX maybe rather store the value scaled to jiffies? + * Note: MAX_SCHEDULE_TIMEOUT/HZ*HZ != MAX_SCHEDULE_TIMEOUT + * and HZ > 10; which is unlikely to change... + * Thus, if interrupted by a signal, + * sock_{send,recv}msg returns -EINTR, + * if the timeout expires, -EAGAIN. + */ + /* unlikely: someone disabled the timeouts ... + * just put some huge values in there. */ + if (!new_conf->ping_int) + new_conf->ping_int = MAX_SCHEDULE_TIMEOUT/HZ; + if (!new_conf->timeout) + new_conf->timeout = MAX_SCHEDULE_TIMEOUT/HZ*10; + if (new_conf->ping_int*10 < new_conf->timeout) + new_conf->timeout = new_conf->ping_int*10/6; + if (new_conf->ping_int*10 == new_conf->timeout) + new_conf->ping_int = new_conf->ping_int+1; +#endif + + if (integrity_w_tfm) { + i = crypto_hash_digestsize(integrity_w_tfm); + int_dig_out = kmalloc(i, GFP_KERNEL); + if (!int_dig_out) { + retcode = KMallocFailed; + goto fail; + } + int_dig_in = kmalloc(i, GFP_KERNEL); + if (!int_dig_in) { + retcode = KMallocFailed; + goto fail; + } + int_dig_vv = kmalloc(i, GFP_KERNEL); + if (!int_dig_vv) { + retcode = KMallocFailed; + goto fail; + } + } + + if (!mdev->bitmap) { + if(drbd_bm_init(mdev)) { + retcode = KMallocFailed; + goto fail; + } + } + + D_ASSERT(mdev->net_conf == NULL); + mdev->net_conf = new_conf; + + mdev->send_cnt = 0; + mdev->recv_cnt = 0; + + if (new_tl_hash) { + kfree(mdev->tl_hash); + mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8; + mdev->tl_hash = new_tl_hash; + } + + if (new_ee_hash) { + kfree(mdev->ee_hash); + mdev->ee_hash_s = mdev->net_conf->max_buffers/8; + mdev->ee_hash = new_ee_hash; + } + + crypto_free_hash(mdev->cram_hmac_tfm); + mdev->cram_hmac_tfm = tfm; + + crypto_free_hash(mdev->integrity_w_tfm); + mdev->integrity_w_tfm = integrity_w_tfm; + + crypto_free_hash(mdev->integrity_r_tfm); + mdev->integrity_r_tfm = integrity_r_tfm; + + kfree(mdev->int_dig_out); + kfree(mdev->int_dig_in); + kfree(mdev->int_dig_vv); + mdev->int_dig_out=int_dig_out; + mdev->int_dig_in=int_dig_in; + mdev->int_dig_vv=int_dig_vv; + + retcode = _drbd_request_state(mdev, NS(conn, Unconnected), ChgStateVerbose); + if (retcode >= SS_Success) + drbd_thread_start(&mdev->worker); + + drbd_kobject_uevent(mdev); + reply->ret_code = retcode; + return 0; + +fail: + kfree(int_dig_out); + kfree(int_dig_in); + kfree(int_dig_vv); + crypto_free_hash(tfm); + crypto_free_hash(integrity_w_tfm); + crypto_free_hash(integrity_r_tfm); + kfree(new_tl_hash); + kfree(new_ee_hash); + kfree(new_conf); + + reply->ret_code = retcode; + return 0; +} + +STATIC int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode; + + retcode = _drbd_request_state(mdev, NS(conn, Disconnecting), ChgOrdered); + + if (retcode == SS_NothingToDo) + goto done; + else if (retcode == SS_AlreadyStandAlone) + goto done; + else if (retcode == SS_PrimaryNOP) { + /* Our statche checking code wants to see the peer outdated. */ + retcode = drbd_request_state(mdev, NS2(conn, Disconnecting, + pdsk, Outdated)); + } else if (retcode == SS_CW_FailedByPeer) { + /* The peer probabely wants to see us outdated. */ + retcode = _drbd_request_state(mdev, NS2(conn, Disconnecting, + disk, Outdated), + ChgOrdered); + if (retcode == SS_IsDiskLess || retcode == SS_LowerThanOutdated) { + drbd_force_state(mdev, NS(conn, Disconnecting)); + retcode = SS_Success; + } + } + + if (retcode < SS_Success) + goto fail; + + if (wait_event_interruptible(mdev->state_wait, + mdev->state.conn != Disconnecting)) { + /* Do not test for mdev->state.conn == StandAlone, since + someone else might connect us in the mean time! */ + retcode = GotSignal; + goto fail; + } + + done: + retcode = NoError; + fail: + drbd_md_sync(mdev); + reply->ret_code = retcode; + return 0; +} + +void resync_after_online_grow(struct drbd_conf *mdev) +{ + int iass; /* I am sync source */ + + INFO("Resync of new storage after online grow\n"); + if (mdev->state.role != mdev->state.peer) + iass = (mdev->state.role == Primary); + else + iass = test_bit(DISCARD_CONCURRENT, &mdev->flags); + + if (iass) + drbd_start_resync(mdev, SyncSource); + else + _drbd_request_state(mdev, NS(conn, WFSyncUUID), ChgStateVerbose + ChgSerialize); +} + +STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + struct resize rs; + int retcode = NoError; + int ldsc = 0; /* local disk size changed */ + enum determin_dev_size_enum dd; + + memset(&rs, 0, sizeof(struct resize)); + if (!resize_from_tags(mdev, nlp->tag_list, &rs)) { + retcode = UnknownMandatoryTag; + goto fail; + } + + if (mdev->state.conn > Connected) { + retcode = NoResizeDuringResync; + goto fail; + } + + if (mdev->state.role == Secondary && + mdev->state.peer == Secondary) { + retcode = APrimaryNodeNeeded; + goto fail; + } + + if (!inc_local(mdev)) { + retcode = HaveNoDiskConfig; + goto fail; + } + + if (mdev->bc->known_size != drbd_get_capacity(mdev->bc->backing_bdev)) { + mdev->bc->known_size = drbd_get_capacity(mdev->bc->backing_bdev); + ldsc = 1; + } + + mdev->bc->dc.disk_size = (sector_t)rs.resize_size; + dd = drbd_determin_dev_size(mdev); + drbd_md_sync(mdev); + dec_local(mdev); + if (dd == dev_size_error) { + retcode = VMallocFailed; + goto fail; + } + + if (mdev->state.conn == Connected && (dd != unchanged || ldsc)) { + drbd_send_uuids(mdev); + drbd_send_sizes(mdev); + if (dd == grew) + resync_after_online_grow(mdev); + } + + fail: + reply->ret_code = retcode; + return 0; +} + +STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode = NoError; + int err; + int ovr; /* online verify running */ + int rsr; /* re-sync running */ + struct drbd_conf *odev; + struct crypto_hash *verify_tfm = NULL; + struct crypto_hash *csums_tfm = NULL; + struct syncer_conf sc; + cpumask_t n_cpu_mask = CPU_MASK_NONE; + + memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); + + if (nlp->flags & DRBD_NL_SET_DEFAULTS) { + memset(&sc, 0, sizeof(struct syncer_conf)); + sc.rate = DRBD_RATE_DEF; + sc.after = DRBD_AFTER_DEF; + sc.al_extents = DRBD_AL_EXTENTS_DEF; + } + + if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) { + retcode = UnknownMandatoryTag; + goto fail; + } + + if (sc.after != -1) { + if (sc.after < -1 || minor_to_mdev(sc.after) == NULL) { + retcode = SyncAfterInvalid; + goto fail; + } + odev = minor_to_mdev(sc.after); /* check against loops in */ + while (1) { + if (odev == mdev) { + retcode = SyncAfterCycle; + goto fail; + } + if (odev->sync_conf.after == -1) + break; /* no cycles. */ + odev = minor_to_mdev(odev->sync_conf.after); + } + } + + /* re-sync running */ + rsr = ( mdev->state.conn == SyncSource || + mdev->state.conn == SyncTarget || + mdev->state.conn == PausedSyncS || + mdev->state.conn == PausedSyncT ); + + if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) { + retcode = CSUMSResyncRunning; + goto fail; + } + + if (!rsr && sc.csums_alg[0]) { + csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(csums_tfm)) { + csums_tfm = NULL; + retcode = CSUMSAlgNotAvail; + goto fail; + } + + if (crypto_tfm_alg_type(crypto_hash_tfm(csums_tfm)) != CRYPTO_ALG_TYPE_DIGEST) { + retcode = CSUMSAlgNotDigest; + goto fail; + } + } + + /* online verify running */ + ovr = (mdev->state.conn == VerifyS || mdev->state.conn == VerifyT); + + if (ovr) { + if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) { + retcode = VERIFYIsRunning; + goto fail; + } + } + + if (!ovr && sc.verify_alg[0]) { + verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(verify_tfm)) { + verify_tfm = NULL; + retcode = VERIFYAlgNotAvail; + goto fail; + } + + if (crypto_tfm_alg_type(crypto_hash_tfm(verify_tfm)) != CRYPTO_ALG_TYPE_DIGEST) { + retcode = VERIFYAlgNotDigest; + goto fail; + } + } + + if (sc.cpu_mask[0] != 0) { + err = __bitmap_parse(sc.cpu_mask, 32, 0, (unsigned long *)&n_cpu_mask, NR_CPUS); + if (err) { + drbd_WARN("__bitmap_parse() failed with %d\n", err); + retcode = CPUMaskParseFailed; + goto fail; + } + } + + ERR_IF (sc.rate < 1) sc.rate = 1; + ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */ +#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT) + if (sc.al_extents > AL_MAX) { + ERR("sc.al_extents > %d\n", AL_MAX); + sc.al_extents = AL_MAX; + } +#undef AL_MAX + + spin_lock(&mdev->peer_seq_lock); + /* lock against receive_SyncParam() */ + mdev->sync_conf = sc; + + if (!rsr) { + crypto_free_hash(mdev->csums_tfm); + mdev->csums_tfm = csums_tfm; + csums_tfm = NULL; + } + + if (!ovr) { + crypto_free_hash(mdev->verify_tfm); + mdev->verify_tfm = verify_tfm; + verify_tfm = NULL; + } + spin_unlock(&mdev->peer_seq_lock); + + if (inc_local(mdev)) { + wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); + drbd_al_shrink(mdev); + err = drbd_check_al_size(mdev); + lc_unlock(mdev->act_log); + wake_up(&mdev->al_wait); + + dec_local(mdev); + drbd_md_sync(mdev); + + if (err) { + retcode = KMallocFailed; + goto fail; + } + } + + if (mdev->state.conn >= Connected) + drbd_send_sync_param(mdev, &sc); + + drbd_alter_sa(mdev, sc.after); + + if (!cpus_equal(mdev->cpu_mask, n_cpu_mask)) { + mdev->cpu_mask = n_cpu_mask; + mdev->cpu_mask = drbd_calc_cpu_mask(mdev); + mdev->receiver.reset_cpu_mask = 1; + mdev->asender.reset_cpu_mask = 1; + mdev->worker.reset_cpu_mask = 1; + } + + drbd_kobject_uevent(mdev); +fail: + crypto_free_hash(csums_tfm); + crypto_free_hash(verify_tfm); + reply->ret_code = retcode; + return 0; +} + +STATIC int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode; + + retcode = _drbd_request_state(mdev, NS(conn, StartingSyncT), ChgOrdered); + + if (retcode < SS_Success && retcode != SS_NeedConnection) + retcode = drbd_request_state(mdev, NS(conn, StartingSyncT)); + + while (retcode == SS_NeedConnection) { + spin_lock_irq(&mdev->req_lock); + if (mdev->state.conn < Connected) + retcode = _drbd_set_state(_NS(mdev, disk, Inconsistent), ChgStateVerbose, NULL); + spin_unlock_irq(&mdev->req_lock); + + if (retcode != SS_NeedConnection) + break; + + retcode = drbd_request_state(mdev, NS(conn, StartingSyncT)); + } + + reply->ret_code = retcode; + return 0; +} + +STATIC int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + + reply->ret_code = drbd_request_state(mdev, NS(conn, StartingSyncS)); + + return 0; +} + +STATIC int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode = NoError; + + if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NothingToDo) + retcode = PauseFlagAlreadySet; + + reply->ret_code = retcode; + return 0; +} + +STATIC int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode = NoError; + + if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NothingToDo) + retcode = PauseFlagAlreadyClear; + + reply->ret_code = retcode; + return 0; +} + +STATIC int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + reply->ret_code = drbd_request_state(mdev, NS(susp, 1)); + + return 0; +} + +STATIC int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + reply->ret_code = drbd_request_state(mdev, NS(susp, 0)); + return 0; +} + +STATIC int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + reply->ret_code = drbd_request_state(mdev, NS(disk, Outdated)); + return 0; +} + +STATIC int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + unsigned short *tl; + + tl = reply->tag_list; + + if (inc_local(mdev)) { + tl = disk_conf_to_tags(mdev, &mdev->bc->dc, tl); + dec_local(mdev); + } + + if (inc_net(mdev)) { + tl = net_conf_to_tags(mdev, mdev->net_conf, tl); + dec_net(mdev); + } + tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl); + + *tl++ = TT_END; /* Close the tag list */ + + return (int)((char *)tl - (char *)reply->tag_list); +} + +STATIC int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + unsigned short *tl = reply->tag_list; + union drbd_state_t s = mdev->state; + unsigned long rs_left; + unsigned int res; + + tl = get_state_to_tags(mdev, (struct get_state *)&s, tl); + + /* no local ref, no bitmap, no syncer progress. */ + if (s.conn >= SyncSource && s.conn <= PausedSyncT) { + if (inc_local(mdev)) { + drbd_get_syncer_progress(mdev, &rs_left, &res); + *tl++ = T_sync_progress; + *tl++ = sizeof(int); + memcpy(tl, &res, sizeof(int)); + tl = (unsigned short *)((char *)tl + sizeof(int)); + dec_local(mdev); + } + } + *tl++ = TT_END; /* Close the tag list */ + + return (int)((char *)tl - (char *)reply->tag_list); +} + +STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + unsigned short *tl; + + tl = reply->tag_list; + + if (inc_local(mdev)) { + /* This is a hand crafted add tag ;) */ + *tl++ = T_uuids; + *tl++ = UUID_SIZE*sizeof(u64); + memcpy(tl, mdev->bc->md.uuid, UUID_SIZE*sizeof(u64)); + tl = (unsigned short *)((char *)tl + UUID_SIZE*sizeof(u64)); + *tl++ = T_uuids_flags; + *tl++ = sizeof(int); + memcpy(tl, &mdev->bc->md.flags, sizeof(int)); + tl = (unsigned short *)((char *)tl + sizeof(int)); + dec_local(mdev); + } + *tl++ = TT_END; /* Close the tag list */ + + return (int)((char *)tl - (char *)reply->tag_list); +} + +/** + * drbd_nl_get_timeout_flag: + * Is used by drbdsetup to find out which timeout value to use. + */ +STATIC int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + unsigned short *tl; + char rv; + + tl = reply->tag_list; + + rv = mdev->state.pdsk == Outdated ? UT_PeerOutdated : + test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_Degraded : UT_Default; + + /* This is a hand crafted add tag ;) */ + *tl++ = T_use_degraded; + *tl++ = sizeof(char); + *((char *)tl) = rv; + tl = (unsigned short *)((char *)tl + sizeof(char)); + *tl++ = TT_END; + + return (int)((char *)tl - (char *)reply->tag_list); +} + +STATIC int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + reply->ret_code = drbd_request_state(mdev,NS(conn,VerifyS)); + + return 0; +} + + +STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + int retcode = NoError; + int err; + + struct new_c_uuid args; + + memset(&args, 0, sizeof(struct new_c_uuid)); + if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) { + reply->ret_code = UnknownMandatoryTag; + return 0; + } + + mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */ + + if (mdev->state.conn >= Connected) { + retcode = MayNotBeConnected; + goto out; + } + + if (!inc_local(mdev)) { + retcode = HaveNoDiskConfig; + goto out; + } + + drbd_uuid_set(mdev, Bitmap, 0); /* Rotate Bitmap to History 1, etc... */ + drbd_uuid_new_current(mdev); /* New current, previous to Bitmap */ + + if (args.clear_bm) { + err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid"); + if (err) { + ERR("Writing bitmap failed with %d\n",err); + retcode = MDIOError; + } + } + + drbd_md_sync(mdev); + dec_local(mdev); +out: + mutex_unlock(&mdev->state_mutex); + + reply->ret_code = retcode; + return 0; +} + +STATIC struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp) +{ + struct drbd_conf *mdev; + + if (nlp->drbd_minor >= minor_count) + return NULL; + + mdev = minor_to_mdev(nlp->drbd_minor); + + if (!mdev && (nlp->flags & DRBD_NL_CREATE_DEVICE)) { + struct gendisk *disk = NULL; + mdev = drbd_new_device(nlp->drbd_minor); + + spin_lock_irq(&drbd_pp_lock); + if (minor_table[nlp->drbd_minor] == NULL) { + minor_table[nlp->drbd_minor] = mdev; + disk = mdev->vdisk; + mdev = NULL; + } /* else: we lost the race */ + spin_unlock_irq(&drbd_pp_lock); + + if (disk) /* we won the race above */ + /* in case we ever add a drbd_delete_device(), + * don't forget the del_gendisk! */ + add_disk(disk); + else /* we lost the race above */ + drbd_free_mdev(mdev); + + mdev = minor_to_mdev(nlp->drbd_minor); + } + + return mdev; +} + +struct cn_handler_struct { + int (*function)(struct drbd_conf *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int reply_body_size; +}; + +static struct cn_handler_struct cnd_table[] = { + [ P_primary ] = { &drbd_nl_primary, 0 }, + [ P_secondary ] = { &drbd_nl_secondary, 0 }, + [ P_disk_conf ] = { &drbd_nl_disk_conf, 0 }, + [ P_detach ] = { &drbd_nl_detach, 0 }, + [ P_net_conf ] = { &drbd_nl_net_conf, 0 }, + [ P_disconnect ] = { &drbd_nl_disconnect, 0 }, + [ P_resize ] = { &drbd_nl_resize, 0 }, + [ P_syncer_conf ] = { &drbd_nl_syncer_conf, 0 }, + [ P_invalidate ] = { &drbd_nl_invalidate, 0 }, + [ P_invalidate_peer ] = { &drbd_nl_invalidate_peer, 0 }, + [ P_pause_sync ] = { &drbd_nl_pause_sync, 0 }, + [ P_resume_sync ] = { &drbd_nl_resume_sync, 0 }, + [ P_suspend_io ] = { &drbd_nl_suspend_io, 0 }, + [ P_resume_io ] = { &drbd_nl_resume_io, 0 }, + [ P_outdate ] = { &drbd_nl_outdate, 0 }, + [ P_get_config ] = { &drbd_nl_get_config, + sizeof(struct syncer_conf_tag_len_struct) + + sizeof(struct disk_conf_tag_len_struct) + + sizeof(struct net_conf_tag_len_struct) }, + [ P_get_state ] = { &drbd_nl_get_state, + sizeof(struct get_state_tag_len_struct) + + sizeof(struct sync_progress_tag_len_struct) }, + [ P_get_uuids ] = { &drbd_nl_get_uuids, + sizeof(struct get_uuids_tag_len_struct) }, + [ P_get_timeout_flag ] = { &drbd_nl_get_timeout_flag, + sizeof(struct get_timeout_flag_tag_len_struct)}, + [ P_start_ov ] = { &drbd_nl_start_ov, 0 }, + [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 }, +}; + +STATIC void drbd_connector_callback(void *data) +{ + struct cn_msg *req = data; + struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; + struct cn_handler_struct *cm; + struct cn_msg *cn_reply; + struct drbd_nl_cfg_reply *reply; + struct drbd_conf *mdev; + int retcode, rr; + int reply_size = sizeof(struct cn_msg) + + sizeof(struct drbd_nl_cfg_reply) + + sizeof(short int); + + if (!try_module_get(THIS_MODULE)) { + printk(KERN_ERR "drbd: try_module_get() failed!\n"); + return; + } + + mdev = ensure_mdev(nlp); + if (!mdev) { + retcode = MinorNotKnown; + goto fail; + } + + TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_packet(data);); + + if (nlp->packet_type >= P_nl_after_last_packet) { + retcode = UnknownNetLinkPacket; + goto fail; + } + + cm = cnd_table + nlp->packet_type; + + /* This may happen if packet number is 0: */ + if (cm->function == NULL) { + retcode = UnknownNetLinkPacket; + goto fail; + } + + reply_size += cm->reply_body_size; + + cn_reply = kmalloc(reply_size, GFP_KERNEL); + if (!cn_reply) { + retcode = KMallocFailed; + goto fail; + } + reply = (struct drbd_nl_cfg_reply *) cn_reply->data; + + reply->packet_type = + cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet; + reply->minor = nlp->drbd_minor; + reply->ret_code = NoError; /* Might by modified by cm->function. */ + /* reply->tag_list; might be modified by cm->fucntion. */ + + rr = cm->function(mdev, nlp, reply); + + cn_reply->id = req->id; + cn_reply->seq = req->seq; + cn_reply->ack = req->ack + 1; + cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr; + cn_reply->flags = 0; + + TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + + rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); + if (rr && rr != -ESRCH) + printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); + + kfree(cn_reply); + module_put(THIS_MODULE); + return; + fail: + drbd_nl_send_reply(req, retcode); + module_put(THIS_MODULE); +} + +static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */ + +static inline unsigned short * +__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, + int len, int nul_terminated) +{ + int l = tag_descriptions[tag_number(tag)].max_len; + l = (len < l) ? len : l; + *tl++ = tag; + *tl++ = len; + memcpy(tl, data, len); + /* TODO + * maybe we need to add some padding to the data stream. + * otherwise we may get strange effects on architectures + * that require certain data types to be strictly aligned, + * because now the next "unsigned short" may be misaligned. */ + tl = (unsigned short*)((char*)tl + len); + if (nul_terminated) + *((char*)tl - 1) = 0; + return tl; +} + +static inline unsigned short * +tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len) +{ + return __tl_add_blob(tl, tag, data, len, 0); +} + +static inline unsigned short * +tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str) +{ + return __tl_add_blob(tl, tag, str, strlen(str)+1, 0); +} + +static inline unsigned short * +tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val) +{ + switch(tag_type(tag)) { + case TT_INTEGER: + *tl++ = tag; + *tl++ = sizeof(int); + *(int*)tl = *(int*)val; + tl = (unsigned short*)((char*)tl+sizeof(int)); + break; + case TT_INT64: + *tl++ = tag; + *tl++ = sizeof(u64); + *(u64*)tl = *(u64*)val; + tl = (unsigned short*)((char*)tl+sizeof(u64)); + break; + default: + /* someone did something stupid. */ + ; + } + return tl; +} + +void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state_t state) +{ + char buffer[sizeof(struct cn_msg)+ + sizeof(struct drbd_nl_cfg_reply)+ + sizeof(struct get_state_tag_len_struct)+ + sizeof(short int)]; + struct cn_msg *cn_reply = (struct cn_msg *) buffer; + struct drbd_nl_cfg_reply *reply = + (struct drbd_nl_cfg_reply *)cn_reply->data; + unsigned short *tl = reply->tag_list; + + /* drbd_WARN("drbd_bcast_state() got called\n"); */ + + tl = get_state_to_tags(mdev, (struct get_state *)&state, tl); + *tl++ = TT_END; /* Close the tag list */ + + cn_reply->id.idx = CN_IDX_DRBD; + cn_reply->id.val = CN_VAL_DRBD; + + cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->ack = 0; /* not used here. */ + cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + + (int)((char *)tl - (char *)reply->tag_list); + cn_reply->flags = 0; + + reply->packet_type = P_get_state; + reply->minor = mdev_to_minor(mdev); + reply->ret_code = NoError; + + TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + + cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); +} + +void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) +{ + char buffer[sizeof(struct cn_msg)+ + sizeof(struct drbd_nl_cfg_reply)+ + sizeof(struct call_helper_tag_len_struct)+ + sizeof(short int)]; + struct cn_msg *cn_reply = (struct cn_msg *) buffer; + struct drbd_nl_cfg_reply *reply = + (struct drbd_nl_cfg_reply *)cn_reply->data; + unsigned short *tl = reply->tag_list; + int str_len; + + /* drbd_WARN("drbd_bcast_state() got called\n"); */ + + str_len = strlen(helper_name)+1; + *tl++ = T_helper; + *tl++ = str_len; + memcpy(tl, helper_name, str_len); + tl = (unsigned short *)((char *)tl + str_len); + *tl++ = TT_END; /* Close the tag list */ + + cn_reply->id.idx = CN_IDX_DRBD; + cn_reply->id.val = CN_VAL_DRBD; + + cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->ack = 0; /* not used here. */ + cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + + (int)((char *)tl - (char *)reply->tag_list); + cn_reply->flags = 0; + + reply->packet_type = P_call_helper; + reply->minor = mdev_to_minor(mdev); + reply->ret_code = NoError; + + TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + + cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); +} + +void drbd_bcast_ee(struct drbd_conf *mdev, + const char *reason, const int dgs, + const char* seen_hash, const char* calc_hash, + const struct Tl_epoch_entry* e) +{ + struct cn_msg *cn_reply; + struct drbd_nl_cfg_reply *reply; + struct bio_vec *bvec; + unsigned short *tl; + int i; + + if (!e) + return; + if (!reason || !reason[0]) + return; + + /* aparently we have to memcpy twice, first to prepare the data for the + * struct cn_msg, then within cn_netlink_send from the cn_msg to the + * netlink skb. */ + cn_reply = kmalloc( + sizeof(struct cn_msg)+ + sizeof(struct drbd_nl_cfg_reply)+ + sizeof(struct dump_ee_tag_len_struct)+ + sizeof(short int) + , GFP_KERNEL); + + if (!cn_reply) { + ERR("could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n", + (unsigned long long)e->sector, e->size); + return; + } + + reply = (struct drbd_nl_cfg_reply*)cn_reply->data; + tl = reply->tag_list; + + tl = tl_add_str(tl, T_dump_ee_reason, reason); + tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs); + tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs); + tl = tl_add_int(tl, T_ee_sector, &e->sector); + tl = tl_add_int(tl, T_ee_block_id, &e->block_id); + + *tl++ = T_ee_data; + *tl++ = e->size; + + __bio_for_each_segment(bvec, e->private_bio, i, 0) { + void *d = kmap(bvec->bv_page); + memcpy(tl, d + bvec->bv_offset, bvec->bv_len); + kunmap(bvec->bv_page); + tl=(unsigned short*)((char*)tl + bvec->bv_len); + } + *tl++ = TT_END; /* Close the tag list */ + + cn_reply->id.idx = CN_IDX_DRBD; + cn_reply->id.val = CN_VAL_DRBD; + + cn_reply->seq = atomic_add_return(1,&drbd_nl_seq); + cn_reply->ack = 0; // not used here. + cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + + (int)((char*)tl - (char*)reply->tag_list); + cn_reply->flags = 0; + + reply->packet_type = P_dump_ee; + reply->minor = mdev_to_minor(mdev); + reply->ret_code = NoError; + + TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + + cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); + kfree(cn_reply); +} + +void drbd_bcast_sync_progress(struct drbd_conf *mdev) +{ + char buffer[sizeof(struct cn_msg)+ + sizeof(struct drbd_nl_cfg_reply)+ + sizeof(struct sync_progress_tag_len_struct)+ + sizeof(short int)]; + struct cn_msg *cn_reply = (struct cn_msg *) buffer; + struct drbd_nl_cfg_reply *reply = + (struct drbd_nl_cfg_reply *)cn_reply->data; + unsigned short *tl = reply->tag_list; + unsigned long rs_left; + unsigned int res; + + /* no local ref, no bitmap, no syncer progress, no broadcast. */ + if (!inc_local(mdev)) + return; + drbd_get_syncer_progress(mdev, &rs_left, &res); + dec_local(mdev); + + *tl++ = T_sync_progress; + *tl++ = sizeof(int); + memcpy(tl, &res, sizeof(int)); + tl = (unsigned short *)((char *)tl + sizeof(int)); + *tl++ = TT_END; /* Close the tag list */ + + cn_reply->id.idx = CN_IDX_DRBD; + cn_reply->id.val = CN_VAL_DRBD; + + cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->ack = 0; /* not used here. */ + cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + + (int)((char *)tl - (char *)reply->tag_list); + cn_reply->flags = 0; + + reply->packet_type = P_sync_progress; + reply->minor = mdev_to_minor(mdev); + reply->ret_code = NoError; + + TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + + cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); +} + +int __init drbd_nl_init(void) +{ + static struct cb_id cn_id_drbd; + int err, try=10; + + cn_id_drbd.val = CN_VAL_DRBD; + do { + cn_id_drbd.idx = cn_idx; + err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback); + if (!err) + break; + cn_idx = (cn_idx + CN_IDX_STEP); + } while (try--); + + if (err) { + printk(KERN_ERR "drbd: cn_drbd failed to register\n"); + return err; + } + + return 0; +} + +void drbd_nl_cleanup(void) +{ + static struct cb_id cn_id_drbd; + + cn_id_drbd.idx = cn_idx; + cn_id_drbd.val = CN_VAL_DRBD; + + cn_del_callback(&cn_id_drbd); +} + +void drbd_nl_send_reply(struct cn_msg *req, int ret_code) +{ + char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)]; + struct cn_msg *cn_reply = (struct cn_msg *) buffer; + struct drbd_nl_cfg_reply *reply = + (struct drbd_nl_cfg_reply *)cn_reply->data; + int rr; + + cn_reply->id = req->id; + + cn_reply->seq = req->seq; + cn_reply->ack = req->ack + 1; + cn_reply->len = sizeof(struct drbd_nl_cfg_reply); + cn_reply->flags = 0; + + reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; + reply->ret_code = ret_code; + + TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + + rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); + if (rr && rr != -ESRCH) + printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); +} + diff --git a/include/linux/drbd.h b/include/linux/drbd.h new file mode 100644 index 000000000000..a494853e35ce --- /dev/null +++ b/include/linux/drbd.h @@ -0,0 +1,372 @@ +/* + drbd.h + Kernel module for 2.6.x Kernels + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2001-2008, Philipp Reisner . + Copyright (C) 2001-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ +#ifndef DRBD_H +#define DRBD_H +#include +#include + +#include + +#ifdef __KERNEL__ +#include +#include +#else +#include +#include +#include + +/* Altough the Linux source code makes a difference between + generic endiness and the bitfields' endianess, there is no + architecture as of Linux-2.6.24-rc4 where the bitfileds' endianess + does not match the generic endianess. */ + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define __LITTLE_ENDIAN_BITFIELD +#elif __BYTE_ORDER == __BIG_ENDIAN +#define __BIG_ENDIAN_BITFIELD +#else +# error "sorry, weird endianness on this box" +#endif + +#endif + + +enum io_error_handler { + PassOn, /* FIXME should the better be named "Ignore"? */ + CallIOEHelper, + Detach +}; + +enum fencing_policy { + DontCare, + Resource, + Stonith +}; + +enum disconnect_handler { + Reconnect, + DropNetConf, + FreezeIO +}; + +enum after_sb_handler { + Disconnect, + DiscardYoungerPri, + DiscardOlderPri, + DiscardZeroChg, + DiscardLeastChg, + DiscardLocal, + DiscardRemote, + Consensus, + DiscardSecondary, + CallHelper, + Violently +}; + +/* KEEP the order, do not delete or insert! + * Or change the API_VERSION, too. */ +enum ret_codes { + RetCodeBase = 100, + NoError, /* 101 ... */ + LAAlreadyInUse, + OAAlreadyInUse, + LDNameInvalid, + MDNameInvalid, + LDAlreadyInUse, + LDNoBlockDev, + MDNoBlockDev, + LDOpenFailed, + MDOpenFailed, + LDDeviceTooSmall, + MDDeviceTooSmall, + LDNoConfig, + LDMounted, + MDMounted, + LDMDInvalid, + LDDeviceTooLarge, + MDIOError, + MDInvalid, + CRAMAlgNotAvail, + CRAMAlgNotDigest, + KMallocFailed, + DiscardNotAllowed, + HaveDiskConfig, + HaveNetConfig, + UnknownMandatoryTag, + MinorNotKnown, + StateNotAllowed, + GotSignal, /* EINTR */ + NoResizeDuringResync, + APrimaryNodeNeeded, + SyncAfterInvalid, + SyncAfterCycle, + PauseFlagAlreadySet, + PauseFlagAlreadyClear, + DiskLowerThanOutdated, /* obsolete, now SS_LowerThanOutdated */ + UnknownNetLinkPacket, + HaveNoDiskConfig, + ProtocolCRequired, + VMallocFailed, + IntegrityAlgNotAvail, + IntegrityAlgNotDigest, + CPUMaskParseFailed, + CSUMSAlgNotAvail, + CSUMSAlgNotDigest, + VERIFYAlgNotAvail, + VERIFYAlgNotDigest, + CSUMSResyncRunning, + VERIFYIsRunning, + DataOfWrongCurrent, + MayNotBeConnected, + + /* insert new ones above this line */ + AfterLastRetCode, +}; + +#define DRBD_PROT_A 1 +#define DRBD_PROT_B 2 +#define DRBD_PROT_C 3 + +enum drbd_role { + Unknown = 0, + Primary = 1, /* role */ + Secondary = 2, /* role */ + role_mask = 3, +}; + +/* The order of these constants is important. + * The lower ones (=WFReportParams ==> There is a socket + */ +enum drbd_conns { + StandAlone, + Disconnecting, /* Temporal state on the way to StandAlone. */ + Unconnected, /* >= Unconnected -> inc_net() succeeds */ + + /* These temporal states are all used on the way + * from >= Connected to Unconnected. + * The 'disconnect reason' states + * I do not allow to change beween them. */ + Timeout, + BrokenPipe, + NetworkFailure, + ProtocolError, + TearDown, + + WFConnection, + WFReportParams, /* we have a socket */ + Connected, /* we have introduced each other */ + StartingSyncS, /* starting full sync by IOCTL. */ + StartingSyncT, /* stariing full sync by IOCTL. */ + WFBitMapS, + WFBitMapT, + WFSyncUUID, + + /* All SyncStates are tested with this comparison + * xx >= SyncSource && xx <= PausedSyncT */ + SyncSource, + SyncTarget, + VerifyS, + VerifyT, + PausedSyncS, + PausedSyncT, + conn_mask = 31 +}; + +enum drbd_disk_state { + Diskless, + Attaching, /* In the process of reading the meta-data */ + Failed, /* Becomes Diskless as soon as we told it the peer */ + /* when >= Failed it is legal to access mdev->bc */ + Negotiating, /* Late attaching state, we need to talk to the peer */ + Inconsistent, + Outdated, + DUnknown, /* Only used for the peer, never for myself */ + Consistent, /* Might be Outdated, might be UpToDate ... */ + UpToDate, /* Only this disk state allows applications' IO ! */ + disk_mask = 15 +}; + +union drbd_state_t { +/* According to gcc's docs is the ... + * The order of allocation of bit-fields within a unit (C90 6.5.2.1, C99 6.7.2.1). + * Determined by ABI. + * pointed out by Maxim Uvarov q + * even though we transmit as "cpu_to_be32(state)", + * the offsets of the bitfields still need to be swapped + * on different endianess. + */ + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + unsigned role:2 ; /* 3/4 primary/secondary/unknown */ + unsigned peer:2 ; /* 3/4 primary/secondary/unknown */ + unsigned conn:5 ; /* 17/32 cstates */ + unsigned disk:4 ; /* 8/16 from Diskless to UpToDate */ + unsigned pdsk:4 ; /* 8/16 from Diskless to UpToDate */ + unsigned susp:1 ; /* 2/2 IO suspended no/yes */ + unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ + unsigned peer_isp:1 ; + unsigned user_isp:1 ; + unsigned _pad:11; /* 0 unused */ +#elif defined(__BIG_ENDIAN_BITFIELD) + unsigned _pad:11; /* 0 unused */ + unsigned user_isp:1 ; + unsigned peer_isp:1 ; + unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ + unsigned susp:1 ; /* 2/2 IO suspended no/yes */ + unsigned pdsk:4 ; /* 8/16 from Diskless to UpToDate */ + unsigned disk:4 ; /* 8/16 from Diskless to UpToDate */ + unsigned conn:5 ; /* 17/32 cstates */ + unsigned peer:2 ; /* 3/4 primary/secondary/unknown */ + unsigned role:2 ; /* 3/4 primary/secondary/unknown */ +#else +# error "this endianess is not supported" +#endif +#ifndef DRBD_DEBUG_STATE_CHANGES +#define DRBD_DEBUG_STATE_CHANGES 0 +#endif +#if DRBD_DEBUG_STATE_CHANGES + unsigned int line; + const char *func; +#endif + }; + unsigned int i; +}; + +enum set_st_err { + SS_CW_NoNeed = 4, + SS_CW_Success = 3, + SS_NothingToDo = 2, + SS_Success = 1, + SS_UnknownError = 0, /* Used to sleep longer in _drbd_request_state */ + SS_TwoPrimaries = -1, + SS_NoUpToDateDisk = -2, + SS_BothInconsistent = -4, + SS_SyncingDiskless = -5, + SS_ConnectedOutdates = -6, + SS_PrimaryNOP = -7, + SS_ResyncRunning = -8, + SS_AlreadyStandAlone = -9, + SS_CW_FailedByPeer = -10, + SS_IsDiskLess = -11, + SS_DeviceInUse = -12, + SS_NoNetConfig = -13, + SS_NoVerifyAlg = -14, /* drbd-8.2 only */ + SS_NeedConnection = -15, /* drbd-8.2 only */ + SS_LowerThanOutdated = -16, + SS_NotSupported = -17, /* drbd-8.2 only */ + SS_InTransientState = -18, /* Retry after the next state change */ + SS_ConcurrentStChg = -19, /* Concurrent cluster side state change! */ + SS_AfterLastError = -20, /* Keep this at bottom */ +}; + +/* from drbd_strings.c */ +extern const char *conns_to_name(enum drbd_conns); +extern const char *roles_to_name(enum drbd_role); +extern const char *disks_to_name(enum drbd_disk_state); +extern const char *set_st_err_name(enum set_st_err); + +#ifndef BDEVNAME_SIZE +# define BDEVNAME_SIZE 32 +#endif + +#define SHARED_SECRET_MAX 64 + +enum MetaDataFlags { + __MDF_Consistent, + __MDF_PrimaryInd, + __MDF_ConnectedInd, + __MDF_FullSync, + __MDF_WasUpToDate, + __MDF_PeerOutDated, /* or worse (e.g. invalid). */ + __MDF_CrashedPrimary, +}; +#define MDF_Consistent (1<<__MDF_Consistent) +#define MDF_PrimaryInd (1<<__MDF_PrimaryInd) +#define MDF_ConnectedInd (1<<__MDF_ConnectedInd) +#define MDF_FullSync (1<<__MDF_FullSync) +#define MDF_WasUpToDate (1<<__MDF_WasUpToDate) +#define MDF_PeerOutDated (1<<__MDF_PeerOutDated) +#define MDF_CrashedPrimary (1<<__MDF_CrashedPrimary) + +enum UuidIndex { + Current, + Bitmap, + History_start, + History_end, + UUID_SIZE, /* nl-packet: number of dirty bits */ + UUID_FLAGS, /* nl-packet: flags */ + EXT_UUID_SIZE /* Everything. */ +}; + +enum UseTimeout { + UT_Default = 0, + UT_Degraded = 1, + UT_PeerOutdated = 2, +}; + +#define UUID_JUST_CREATED ((__u64)4) + +#define DRBD_MAGIC 0x83740267 +#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) + +/* these are of type "int" */ +#define DRBD_MD_INDEX_INTERNAL -1 +#define DRBD_MD_INDEX_FLEX_EXT -2 +#define DRBD_MD_INDEX_FLEX_INT -3 + +/* Start of the new netlink/connector stuff */ + +#define DRBD_NL_CREATE_DEVICE 0x01 +#define DRBD_NL_SET_DEFAULTS 0x02 + +/* The following line should be moved over to linux/connector.h + * when the time comes */ +#ifndef CN_IDX_DRBD +# define CN_IDX_DRBD 0x4 +/* Ubuntu "intrepid ibex" release defined CN_IDX_DRBD as 0x6 */ +#endif +#define CN_VAL_DRBD 0x1 + +/* For searching a vacant cn_idx value */ +#define CN_IDX_STEP 6977 + +struct drbd_nl_cfg_req { + int packet_type; + unsigned int drbd_minor; + int flags; + unsigned short tag_list[]; +}; + +struct drbd_nl_cfg_reply { + int packet_type; + unsigned int minor; + int ret_code; /* enum ret_code or set_st_err_t */ + unsigned short tag_list[]; /* only used with get_* calls */ +}; + +#endif diff --git a/include/linux/drbd_config.h b/include/linux/drbd_config.h new file mode 100644 index 000000000000..63d063ec7f8b --- /dev/null +++ b/include/linux/drbd_config.h @@ -0,0 +1,43 @@ +/* + drbd_config.h + DRBD's compile time configuration. + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef DRBD_CONFIG_H +#define DRBD_CONFIG_H + +extern const char *drbd_buildtag(void); + +#define REL_VERSION "8.3.1" +#define API_VERSION 88 +#define PRO_VERSION_MIN 86 +#define PRO_VERSION_MAX 90 + +#ifndef __CHECKER__ /* for a sparse run, we need all STATICs */ +#define DBG_ALL_SYMBOLS /* no static functs, improves quality of OOPS traces */ +#endif + + +/* Define this to enable dynamic tracing controlled by module parameters + * at run time. This enables ALL use of dynamic tracing including packet + * and bio dumping, etc */ +#define ENABLE_DYNAMIC_TRACE + +/* Enable fault insertion code */ +#define DRBD_ENABLE_FAULTS + +#endif diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h new file mode 100644 index 000000000000..7390c354b0d2 --- /dev/null +++ b/include/linux/drbd_limits.h @@ -0,0 +1,133 @@ +/* + drbd_limits.h + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. +*/ + +/* + * Our current limitations. + * Some of them are hard limits, + * some of them are arbitrary range limits, that make it easier to provide + * feedback about nonsense settings for certain configurable values. + */ + +#ifndef DRBD_LIMITS_H +#define DRBD_LIMITS_H 1 + +#define DEBUG_RANGE_CHECK 0 + +#define DRBD_MINOR_COUNT_MIN 1 +#define DRBD_MINOR_COUNT_MAX 255 + +#define DRBD_DIALOG_REFRESH_MIN 0 +#define DRBD_DIALOG_REFRESH_MAX 600 + +/* valid port number */ +#define DRBD_PORT_MIN 1 +#define DRBD_PORT_MAX 0xffff + +/* startup { */ + /* if you want more than 3.4 days, disable */ +#define DRBD_WFC_TIMEOUT_MIN 0 +#define DRBD_WFC_TIMEOUT_MAX 300000 +#define DRBD_WFC_TIMEOUT_DEF 0 + +#define DRBD_DEGR_WFC_TIMEOUT_MIN 0 +#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000 +#define DRBD_DEGR_WFC_TIMEOUT_DEF 0 + +#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0 +#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000 +#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0 +/* }*/ + +/* net { */ + /* timeout, unit centi seconds + * more than one minute timeout is not usefull */ +#define DRBD_TIMEOUT_MIN 1 +#define DRBD_TIMEOUT_MAX 600 +#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ + + /* active connection retries when WFConnection */ +#define DRBD_CONNECT_INT_MIN 1 +#define DRBD_CONNECT_INT_MAX 120 +#define DRBD_CONNECT_INT_DEF 10 /* seconds */ + + /* keep-alive probes when idle */ +#define DRBD_PING_INT_MIN 1 +#define DRBD_PING_INT_MAX 120 +#define DRBD_PING_INT_DEF 10 + + /* timeout for the ping packets.*/ +#define DRBD_PING_TIMEO_MIN 1 +#define DRBD_PING_TIMEO_MAX 100 +#define DRBD_PING_TIMEO_DEF 5 + + /* max number of write requests between write barriers */ +#define DRBD_MAX_EPOCH_SIZE_MIN 1 +#define DRBD_MAX_EPOCH_SIZE_MAX 20000 +#define DRBD_MAX_EPOCH_SIZE_DEF 2048 + + /* I don't think that a tcp send buffer of more than 10M is usefull */ +#define DRBD_SNDBUF_SIZE_MIN 0 +#define DRBD_SNDBUF_SIZE_MAX (10<<20) +#define DRBD_SNDBUF_SIZE_DEF (2*65535) + + /* @4k PageSize -> 128kB - 512MB */ +#define DRBD_MAX_BUFFERS_MIN 32 +#define DRBD_MAX_BUFFERS_MAX 131072 +#define DRBD_MAX_BUFFERS_DEF 2048 + + /* @4k PageSize -> 4kB - 512MB */ +#define DRBD_UNPLUG_WATERMARK_MIN 1 +#define DRBD_UNPLUG_WATERMARK_MAX 131072 +#define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16) + + /* 0 is disabled. + * 200 should be more than enough even for very short timeouts */ +#define DRBD_KO_COUNT_MIN 0 +#define DRBD_KO_COUNT_MAX 200 +#define DRBD_KO_COUNT_DEF 0 +/* } */ + +/* syncer { */ + /* FIXME allow rate to be zero? */ +#define DRBD_RATE_MIN 1 +/* channel bonding 10 GbE, or other hardware */ +#define DRBD_RATE_MAX (4 << 20) +#define DRBD_RATE_DEF 250 /* kb/second */ + + /* less than 7 would hit performance unneccessarily. + * 3833 is the largest prime that still does fit + * into 64 sectors of activity log */ +#define DRBD_AL_EXTENTS_MIN 7 +#define DRBD_AL_EXTENTS_MAX 3833 +#define DRBD_AL_EXTENTS_DEF 127 + +#define DRBD_AFTER_MIN -1 +#define DRBD_AFTER_MAX 255 +#define DRBD_AFTER_DEF -1 + +/* } */ + +/* drbdsetup XY resize -d Z + * you are free to reduce the device size to nothing, if you want to. + * the upper limit with 64bit kernel, enough ram and flexible meta data + * is 16 TB, currently. */ +/* DRBD_MAX_SECTORS */ +#define DRBD_DISK_SIZE_SECT_MIN 0 +#define DRBD_DISK_SIZE_SECT_MAX (16 * (2LLU << 30)) +#define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ + +#define DRBD_ON_IO_ERROR_DEF PassOn +#define DRBD_FENCING_DEF DontCare +#define DRBD_AFTER_SB_0P_DEF Disconnect +#define DRBD_AFTER_SB_1P_DEF Disconnect +#define DRBD_AFTER_SB_2P_DEF Disconnect +#define DRBD_RR_CONFLICT_DEF Disconnect + +#define DRBD_MAX_BIO_BVECS_MIN 0 +#define DRBD_MAX_BIO_BVECS_MAX 128 +#define DRBD_MAX_BIO_BVECS_DEF 0 + +#undef RANGE +#endif diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h new file mode 100644 index 000000000000..6315ce8af647 --- /dev/null +++ b/include/linux/drbd_nl.h @@ -0,0 +1,135 @@ +/* + PAKET( name, + TYPE ( pn, pr, member ) + ... + ) + + You may never reissue one of the pn arguments +*/ + +#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64) +#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined" +#endif + +NL_PACKET(primary, 1, + NL_BIT( 1, T_MAY_IGNORE, overwrite_peer) +) + +NL_PACKET(secondary, 2, ) + +NL_PACKET(disk_conf, 3, + NL_INT64( 2, T_MAY_IGNORE, disk_size) + NL_STRING( 3, T_MANDATORY, backing_dev, 128) + NL_STRING( 4, T_MANDATORY, meta_dev, 128) + NL_INTEGER( 5, T_MANDATORY, meta_dev_idx) + NL_INTEGER( 6, T_MAY_IGNORE, on_io_error) + NL_INTEGER( 7, T_MAY_IGNORE, fencing) + NL_BIT( 37, T_MAY_IGNORE, use_bmbv) + NL_BIT( 53, T_MAY_IGNORE, no_disk_flush) + NL_BIT( 54, T_MAY_IGNORE, no_md_flush) + /* 55 max_bio_size was available in 8.2.6rc2 */ + NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs) + NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier) + NL_BIT( 58, T_MAY_IGNORE, no_disk_drain) +) + +NL_PACKET(detach, 4, ) + +NL_PACKET(net_conf, 5, + NL_STRING( 8, T_MANDATORY, my_addr, 128) + NL_STRING( 9, T_MANDATORY, peer_addr, 128) + NL_STRING( 10, T_MAY_IGNORE, shared_secret, SHARED_SECRET_MAX) + NL_STRING( 11, T_MAY_IGNORE, cram_hmac_alg, SHARED_SECRET_MAX) + NL_STRING( 44, T_MAY_IGNORE, integrity_alg, SHARED_SECRET_MAX) + NL_INTEGER( 14, T_MAY_IGNORE, timeout) + NL_INTEGER( 15, T_MANDATORY, wire_protocol) + NL_INTEGER( 16, T_MAY_IGNORE, try_connect_int) + NL_INTEGER( 17, T_MAY_IGNORE, ping_int) + NL_INTEGER( 18, T_MAY_IGNORE, max_epoch_size) + NL_INTEGER( 19, T_MAY_IGNORE, max_buffers) + NL_INTEGER( 20, T_MAY_IGNORE, unplug_watermark) + NL_INTEGER( 21, T_MAY_IGNORE, sndbuf_size) + NL_INTEGER( 22, T_MAY_IGNORE, ko_count) + NL_INTEGER( 24, T_MAY_IGNORE, after_sb_0p) + NL_INTEGER( 25, T_MAY_IGNORE, after_sb_1p) + NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p) + NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict) + NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo) + /* 59 addr_family was available in GIT, never released */ + NL_BIT( 60, T_MANDATORY, mind_af) + NL_BIT( 27, T_MAY_IGNORE, want_lose) + NL_BIT( 28, T_MAY_IGNORE, two_primaries) + NL_BIT( 41, T_MAY_IGNORE, always_asbp) + NL_BIT( 61, T_MAY_IGNORE, no_cork) + NL_BIT( 62, T_MANDATORY, auto_sndbuf_size) +) + +NL_PACKET(disconnect, 6, ) + +NL_PACKET(resize, 7, + NL_INT64( 29, T_MAY_IGNORE, resize_size) +) + +NL_PACKET(syncer_conf, 8, + NL_INTEGER( 30, T_MAY_IGNORE, rate) + NL_INTEGER( 31, T_MAY_IGNORE, after) + NL_INTEGER( 32, T_MAY_IGNORE, al_extents) + NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX) + NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32) + NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) + NL_BIT( 65, T_MAY_IGNORE, use_rle_encoding) +) + +NL_PACKET(invalidate, 9, ) +NL_PACKET(invalidate_peer, 10, ) +NL_PACKET(pause_sync, 11, ) +NL_PACKET(resume_sync, 12, ) +NL_PACKET(suspend_io, 13, ) +NL_PACKET(resume_io, 14, ) +NL_PACKET(outdate, 15, ) +NL_PACKET(get_config, 16, ) +NL_PACKET(get_state, 17, + NL_INTEGER( 33, T_MAY_IGNORE, state_i) +) + +NL_PACKET(get_uuids, 18, + NL_STRING( 34, T_MAY_IGNORE, uuids, (UUID_SIZE*sizeof(__u64))) + NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags) +) + +NL_PACKET(get_timeout_flag, 19, + NL_BIT( 36, T_MAY_IGNORE, use_degraded) +) + +NL_PACKET(call_helper, 20, + NL_STRING( 38, T_MAY_IGNORE, helper, 32) +) + +/* Tag nr 42 already allocated in drbd-8.1 development. */ + +NL_PACKET(sync_progress, 23, + NL_INTEGER( 43, T_MAY_IGNORE, sync_progress) +) + +NL_PACKET(dump_ee, 24, + NL_STRING( 45, T_MAY_IGNORE, dump_ee_reason, 32) + NL_STRING( 46, T_MAY_IGNORE, seen_digest, SHARED_SECRET_MAX) + NL_STRING( 47, T_MAY_IGNORE, calc_digest, SHARED_SECRET_MAX) + NL_INT64( 48, T_MAY_IGNORE, ee_sector) + NL_INT64( 49, T_MAY_IGNORE, ee_block_id) + NL_STRING( 50, T_MAY_IGNORE, ee_data, 32 << 10) +) + +NL_PACKET(start_ov, 25, +) + +NL_PACKET(new_c_uuid, 26, + NL_BIT( 63, T_MANDATORY, clear_bm) +) + +#undef NL_PACKET +#undef NL_INTEGER +#undef NL_INT64 +#undef NL_BIT +#undef NL_STRING + diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h new file mode 100644 index 000000000000..fcdff8410e99 --- /dev/null +++ b/include/linux/drbd_tag_magic.h @@ -0,0 +1,83 @@ +#ifndef DRBD_TAG_MAGIC_H +#define DRBD_TAG_MAGIC_H + +#define TT_END 0 +#define TT_REMOVED 0xE000 + +/* declare packet_type enums */ +enum packet_types { +#define NL_PACKET(name, number, fields) P_ ## name = number, +#define NL_INTEGER(pn, pr, member) +#define NL_INT64(pn, pr, member) +#define NL_BIT(pn, pr, member) +#define NL_STRING(pn, pr, member, len) +#include "drbd_nl.h" + P_nl_after_last_packet, +}; + +/* These struct are used to deduce the size of the tag lists: */ +#define NL_PACKET(name, number, fields) \ + struct name ## _tag_len_struct { fields }; +#define NL_INTEGER(pn, pr, member) \ + int member; int tag_and_len ## member; +#define NL_INT64(pn, pr, member) \ + __u64 member; int tag_and_len ## member; +#define NL_BIT(pn, pr, member) \ + unsigned char member:1; int tag_and_len ## member; +#define NL_STRING(pn, pr, member, len) \ + unsigned char member[len]; int member ## _len; \ + int tag_and_len ## member; +#include "linux/drbd_nl.h" + +/* declate tag-list-sizes */ +static const int tag_list_sizes[] = { +#define NL_PACKET(name, number, fields) 2 fields , +#define NL_INTEGER(pn, pr, member) + 4 + 4 +#define NL_INT64(pn, pr, member) + 4 + 8 +#define NL_BIT(pn, pr, member) + 4 + 1 +#define NL_STRING(pn, pr, member, len) + 4 + (len) +#include "drbd_nl.h" +}; + +/* The two highest bits are used for the tag type */ +#define TT_MASK 0xC000 +#define TT_INTEGER 0x0000 +#define TT_INT64 0x4000 +#define TT_BIT 0x8000 +#define TT_STRING 0xC000 +/* The next bit indicates if processing of the tag is mandatory */ +#define T_MANDATORY 0x2000 +#define T_MAY_IGNORE 0x0000 +#define TN_MASK 0x1fff +/* The remaining 13 bits are used to enumerate the tags */ + +#define tag_type(T) ((T) & TT_MASK) +#define tag_number(T) ((T) & TN_MASK) + +/* declare tag enums */ +#define NL_PACKET(name, number, fields) fields +enum drbd_tags { +#define NL_INTEGER(pn, pr, member) T_ ## member = pn | TT_INTEGER | pr , +#define NL_INT64(pn, pr, member) T_ ## member = pn | TT_INT64 | pr , +#define NL_BIT(pn, pr, member) T_ ## member = pn | TT_BIT | pr , +#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING | pr , +#include "drbd_nl.h" +}; + +struct tag { + const char *name; + int type_n_flags; + int max_len; +}; + +/* declare tag names */ +#define NL_PACKET(name, number, fields) fields +static const struct tag tag_descriptions[] = { +#define NL_INTEGER(pn, pr, member) [ pn ] = { #member, TT_INTEGER | pr, sizeof(int) }, +#define NL_INT64(pn, pr, member) [ pn ] = { #member, TT_INT64 | pr, sizeof(__u64) }, +#define NL_BIT(pn, pr, member) [ pn ] = { #member, TT_BIT | pr, sizeof(int) }, +#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING | pr, (len) }, +#include "drbd_nl.h" +}; + +#endif -- cgit v1.2.3 From 00b2b2a36da967ff4e8103a55ae9cd6ff15fbc10 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:14 +0200 Subject: DRBD: internal_data_structures The big "struct drbd_conf". It actually describes one DRBD device. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2320 ++++++++++++++++++++++++++++++++++++ drivers/block/drbd/drbd_wrappers.h | 117 ++ 2 files changed, 2437 insertions(+) create mode 100644 drivers/block/drbd/drbd_int.h create mode 100644 drivers/block/drbd/drbd_wrappers.h diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h new file mode 100644 index 000000000000..01e55d959fcd --- /dev/null +++ b/drivers/block/drbd/drbd_int.h @@ -0,0 +1,2320 @@ +/* + drbd_int.h + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +#ifndef _DRBD_INT_H +#define _DRBD_INT_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lru_cache.h" + +#ifdef __CHECKER__ +# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) +# define __protected_read_by(x) __attribute__((require_context(x,1,999,"read"))) +# define __protected_write_by(x) __attribute__((require_context(x,1,999,"write"))) +# define __must_hold(x) __attribute__((context(x,1,1), require_context(x,1,999,"call"))) +#else +# define __protected_by(x) +# define __protected_read_by(x) +# define __protected_write_by(x) +# define __must_hold(x) +#endif + +#define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0) + +/* module parameter, defined in drbd_main.c */ +extern unsigned int minor_count; +extern int allow_oos; +extern unsigned int cn_idx; + +#ifdef DRBD_ENABLE_FAULTS +extern int enable_faults; +extern int fault_rate; +extern int fault_devs; +#endif + +extern char usermode_helper[]; + + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +/* I don't remember why XCPU ... + * This is used to wake the asender, + * and to interrupt sending the sending task + * on disconnect. + */ +#define DRBD_SIG SIGXCPU + +/* This is used to stop/restart our threads. + * Cannot use SIGTERM nor SIGKILL, since these + * are sent out by init on runlevel changes + * I choose SIGHUP for now. + */ +#define DRBD_SIGKILL SIGHUP + +/* All EEs on the free list should have ID_VACANT (== 0) + * freshly allocated EEs get !ID_VACANT (== 1) + * so if it says "cannot dereference null pointer at adress 0x00000001", + * it is most likely one of these :( */ + +#define ID_IN_SYNC (4711ULL) +#define ID_OUT_OF_SYNC (4712ULL) + +#define ID_SYNCER (-1ULL) +#define ID_VACANT 0 +#define is_syncer_block_id(id) ((id) == ID_SYNCER) + +struct drbd_conf; + +#ifdef DBG_ALL_SYMBOLS +# define STATIC +#else +# define STATIC static +#endif + +/* + * Some Message Macros + *************************/ + +#define DUMPP(A) ERR(#A " = %p in %s:%d\n", (A), __FILE__, __LINE__); +#define DUMPLU(A) ERR(#A " = %lu in %s:%d\n", (unsigned long)(A), __FILE__, __LINE__); +#define DUMPLLU(A) ERR(#A " = %llu in %s:%d\n", (unsigned long long)(A), __FILE__, __LINE__); +#define DUMPLX(A) ERR(#A " = %lx in %s:%d\n", (A), __FILE__, __LINE__); +#define DUMPI(A) ERR(#A " = %d in %s:%d\n", (int)(A), __FILE__, __LINE__); + + +#define PRINTK(level, fmt, args...) \ + printk(level "drbd%d: " fmt, \ + mdev->minor , ##args) + +#define ALERT(fmt, args...) PRINTK(KERN_ALERT, fmt , ##args) +#define ERR(fmt, args...) PRINTK(KERN_ERR, fmt , ##args) +/* nowadays, WARN() is defined as BUG() without crash in bug.h */ +#define drbd_WARN(fmt, args...) PRINTK(KERN_WARNING, fmt , ##args) +#define INFO(fmt, args...) PRINTK(KERN_INFO, fmt , ##args) +#define DBG(fmt, args...) PRINTK(KERN_DEBUG, fmt , ##args) + +#define D_ASSERT(exp) if (!(exp)) \ + ERR("ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) + +#define ERR_IF(exp) if (({ \ + int _b = (exp) != 0; \ + if (_b) ERR("%s: (%s) in %s:%d\n", \ + __func__, #exp, __FILE__, __LINE__); \ + _b; \ + })) + +/* Defines to control fault insertion */ +enum { + DRBD_FAULT_MD_WR = 0, /* meta data write */ + DRBD_FAULT_MD_RD, /* read */ + DRBD_FAULT_RS_WR, /* resync */ + DRBD_FAULT_RS_RD, + DRBD_FAULT_DT_WR, /* data */ + DRBD_FAULT_DT_RD, + DRBD_FAULT_DT_RA, /* data read ahead */ + DRBD_FAULT_AL_EE, /* alloc ee */ + + DRBD_FAULT_MAX, +}; + +#ifdef DRBD_ENABLE_FAULTS +extern unsigned int +_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); +static inline int +drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { + return fault_rate && + (enable_faults & (1< MayIgnore) ... */ + MAX_OPT_CMD = 0x101, + + /* special command ids for handshake */ + + HandShakeM = 0xfff1, /* First Packet on the MetaSock */ + HandShakeS = 0xfff2, /* First Packet on the Socket */ + + HandShake = 0xfffe /* FIXED for the next century! */ +}; + +static inline const char *cmdname(enum Drbd_Packet_Cmd cmd) +{ + /* THINK may need to become several global tables + * when we want to support more than + * one PRO_VERSION */ + static const char *cmdnames[] = { + [Data] = "Data", + [DataReply] = "DataReply", + [RSDataReply] = "RSDataReply", + [Barrier] = "Barrier", + [ReportBitMap] = "ReportBitMap", + [BecomeSyncTarget] = "BecomeSyncTarget", + [BecomeSyncSource] = "BecomeSyncSource", + [UnplugRemote] = "UnplugRemote", + [DataRequest] = "DataRequest", + [RSDataRequest] = "RSDataRequest", + [SyncParam] = "SyncParam", + [SyncParam89] = "SyncParam89", + [ReportProtocol] = "ReportProtocol", + [ReportUUIDs] = "ReportUUIDs", + [ReportSizes] = "ReportSizes", + [ReportState] = "ReportState", + [ReportSyncUUID] = "ReportSyncUUID", + [AuthChallenge] = "AuthChallenge", + [AuthResponse] = "AuthResponse", + [Ping] = "Ping", + [PingAck] = "PingAck", + [RecvAck] = "RecvAck", + [WriteAck] = "WriteAck", + [RSWriteAck] = "RSWriteAck", + [DiscardAck] = "DiscardAck", + [NegAck] = "NegAck", + [NegDReply] = "NegDReply", + [NegRSDReply] = "NegRSDReply", + [BarrierAck] = "BarrierAck", + [StateChgRequest] = "StateChgRequest", + [StateChgReply] = "StateChgReply", + [OVRequest] = "OVRequest", + [OVReply] = "OVReply", + [OVResult] = "OVResult", + [CsumRSRequest] = "CsumRSRequest", + [RSIsInSync] = "RSIsInSync", + [ReportCBitMap] = "ReportCBitMap", + [MAX_CMD] = NULL, + }; + + if (cmd == HandShakeM) + return "HandShakeM"; + if (cmd == HandShakeS) + return "HandShakeS"; + if (cmd == HandShake) + return "HandShake"; + if (cmd >= MAX_CMD) + return "Unknown"; + return cmdnames[cmd]; +} + +/* for sending/receiving the bitmap, + * possibly in some encoding scheme */ +struct bm_xfer_ctx { + /* "const" + * stores total bits and long words + * of the bitmap, so we don't need to + * call the accessor functions over and again. */ + unsigned long bm_bits; + unsigned long bm_words; + /* during xfer, current position within the bitmap */ + unsigned long bit_offset; + unsigned long word_offset; + + /* statistics; index: (h->command == ReportBitMap) */ + unsigned packets[2]; + unsigned bytes[2]; +}; + +extern void INFO_bm_xfer_stats(struct drbd_conf *mdev, + const char *direction, struct bm_xfer_ctx *c); + +static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) +{ + /* word_offset counts "native long words" (32 or 64 bit), + * aligned at 64 bit. + * Encoded packet may end at an unaligned bit offset. + * In case a fallback clear text packet is transmitted in + * between, we adjust this offset back to the last 64bit + * aligned "native long word", which makes coding and decoding + * the plain text bitmap much more convenient. */ +#if BITS_PER_LONG == 64 + c->word_offset = c->bit_offset >> 6; +#elif BITS_PER_LONG == 32 + c->word_offset = c->bit_offset >> 5; + c->word_offset &= ~(1UL); +#else +# error "unsupported BITS_PER_LONG" +#endif +} + +/* This is the layout for a packet on the wire. + * The byteorder is the network byte order. + * (except block_id and barrier fields. + * these are pointers to local structs + * and have no relevance for the partner, + * which just echoes them as received.) + * + * NOTE that the payload starts at a long aligned offset, + * regardless of 32 or 64 bit arch! + */ +struct Drbd_Header { + u32 magic; + u16 command; + u16 length; /* bytes of data after this header */ + u8 payload[0]; +} __attribute((packed)); +/* 8 bytes. packet FIXED for the next century! */ + +/* + * short commands, packets without payload, plain Drbd_Header: + * Ping + * PingAck + * BecomeSyncTarget + * BecomeSyncSource + * UnplugRemote + */ + +/* + * commands with out-of-struct payload: + * ReportBitMap (no additional fields) + * Data, DataReply (see Drbd_Data_Packet) + * ReportCBitMap (see receive_compressed_bitmap) + */ + +/* these defines must not be changed without changing the protocol version */ +#define DP_HARDBARRIER 1 +#define DP_RW_SYNC 2 +#define DP_MAY_SET_IN_SYNC 4 + +struct Drbd_Data_Packet { + struct Drbd_Header head; + u64 sector; /* 64 bits sector number */ + u64 block_id; /* to identify the request in protocol B&C */ + u32 seq_num; + u32 dp_flags; +} __attribute((packed)); + +/* + * commands which share a struct: + * Drbd_BlockAck_Packet: + * RecvAck (proto B), WriteAck (proto C), + * DiscardAck (proto C, two-primaries conflict detection) + * Drbd_BlockRequest_Packet: + * DataRequest, RSDataRequest + */ +struct Drbd_BlockAck_Packet { + struct Drbd_Header head; + u64 sector; + u64 block_id; + u32 blksize; + u32 seq_num; +} __attribute((packed)); + + +struct Drbd_BlockRequest_Packet { + struct Drbd_Header head; + u64 sector; + u64 block_id; + u32 blksize; + u32 pad; /* to multiple of 8 Byte */ +} __attribute((packed)); + +/* + * commands with their own struct for additional fields: + * HandShake + * Barrier + * BarrierAck + * SyncParam + * ReportParams + */ + +struct Drbd_HandShake_Packet { + struct Drbd_Header head; /* 8 bytes */ + u32 protocol_min; + u32 feature_flags; + u32 protocol_max; + + /* should be more than enough for future enhancements + * for now, feature_flags and the reserverd array shall be zero. + */ + + u32 _pad; + u64 reserverd[7]; +} __attribute((packed)); +/* 80 bytes, FIXED for the next century */ + +struct Drbd_Barrier_Packet { + struct Drbd_Header head; + u32 barrier; /* barrier number _handle_ only */ + u32 pad; /* to multiple of 8 Byte */ +} __attribute((packed)); + +struct Drbd_BarrierAck_Packet { + struct Drbd_Header head; + u32 barrier; + u32 set_size; +} __attribute((packed)); + +struct Drbd_SyncParam_Packet { + struct Drbd_Header head; + u32 rate; + + /* Since protocol version 88 and higher. */ + char verify_alg[0]; +} __attribute((packed)); + +struct Drbd_SyncParam89_Packet { + struct Drbd_Header head; + u32 rate; + /* protocol version 89: */ + char verify_alg[SHARED_SECRET_MAX]; + char csums_alg[SHARED_SECRET_MAX]; +} __attribute((packed)); + +struct Drbd_Protocol_Packet { + struct Drbd_Header head; + u32 protocol; + u32 after_sb_0p; + u32 after_sb_1p; + u32 after_sb_2p; + u32 want_lose; + u32 two_primaries; + + /* Since protocol version 87 and higher. */ + char integrity_alg[0]; + +} __attribute((packed)); + +struct Drbd_GenCnt_Packet { + struct Drbd_Header head; + u64 uuid[EXT_UUID_SIZE]; +} __attribute((packed)); + +struct Drbd_SyncUUID_Packet { + struct Drbd_Header head; + u64 uuid; +} __attribute((packed)); + +struct Drbd_Sizes_Packet { + struct Drbd_Header head; + u64 d_size; /* size of disk */ + u64 u_size; /* user requested size */ + u64 c_size; /* current exported size */ + u32 max_segment_size; /* Maximal size of a BIO */ + u32 queue_order_type; +} __attribute((packed)); + +struct Drbd_State_Packet { + struct Drbd_Header head; + u32 state; +} __attribute((packed)); + +struct Drbd_Req_State_Packet { + struct Drbd_Header head; + u32 mask; + u32 val; +} __attribute((packed)); + +struct Drbd_RqS_Reply_Packet { + struct Drbd_Header head; + u32 retcode; +} __attribute((packed)); + +struct Drbd06_Parameter_P { + u64 size; + u32 state; + u32 blksize; + u32 protocol; + u32 version; + u32 gen_cnt[5]; + u32 bit_map_gen[5]; +} __attribute((packed)); + +struct Drbd_Discard_Packet { + struct Drbd_Header head; + u64 block_id; + u32 seq_num; + u32 pad; +} __attribute((packed)); + +/* Valid values for the encoding field. + * Bump proto version when changing this. */ +enum Drbd_bitmap_code { + RLE_VLI_Bytes = 0, + RLE_VLI_BitsFibD_0_1 = 1, + RLE_VLI_BitsFibD_1_1 = 2, + RLE_VLI_BitsFibD_1_2 = 3, + RLE_VLI_BitsFibD_2_3 = 4, + RLE_VLI_BitsFibD_3_5 = 5, +}; + +struct Drbd_Compressed_Bitmap_Packet { + struct Drbd_Header head; + /* (encoding & 0x0f): actual encoding, see enum Drbd_bitmap_code + * (encoding & 0x80): polarity (set/unset) of first runlength + * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits + * used to pad up to head.length bytes + */ + u8 encoding; + + u8 code[0]; +} __attribute((packed)); + +static inline enum Drbd_bitmap_code +DCBP_get_code(struct Drbd_Compressed_Bitmap_Packet *p) +{ + return (enum Drbd_bitmap_code)(p->encoding & 0x0f); +} + +static inline void +DCBP_set_code(struct Drbd_Compressed_Bitmap_Packet *p, enum Drbd_bitmap_code code) +{ + BUG_ON(code & ~0xf); + p->encoding = (p->encoding & ~0xf) | code; +} + +static inline int +DCBP_get_start(struct Drbd_Compressed_Bitmap_Packet *p) +{ + return (p->encoding & 0x80) != 0; +} + +static inline void +DCBP_set_start(struct Drbd_Compressed_Bitmap_Packet *p, int set) +{ + p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0); +} + +static inline int +DCBP_get_pad_bits(struct Drbd_Compressed_Bitmap_Packet *p) +{ + return (p->encoding >> 4) & 0x7; +} + +static inline void +DCBP_set_pad_bits(struct Drbd_Compressed_Bitmap_Packet *p, int n) +{ + BUG_ON(n & ~0x7); + p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4); +} + +/* one bitmap packet, including the Drbd_Header, + * should fit within one _architecture independend_ page. + * so we need to use the fixed size 4KiB page size + * most architechtures have used for a long time. + */ +#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct Drbd_Header)) +#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) +#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct Drbd_Compressed_Bitmap_Packet)) +#if (PAGE_SIZE < 4096) +/* drbd_send_bitmap / receive_bitmap would break horribly */ +#error "PAGE_SIZE too small" +#endif + +union Drbd_Polymorph_Packet { + struct Drbd_Header head; + struct Drbd_HandShake_Packet HandShake; + struct Drbd_Data_Packet Data; + struct Drbd_BlockAck_Packet BlockAck; + struct Drbd_Barrier_Packet Barrier; + struct Drbd_BarrierAck_Packet BarrierAck; + struct Drbd_SyncParam89_Packet SyncParam89; + struct Drbd_Protocol_Packet Protocol; + struct Drbd_Sizes_Packet Sizes; + struct Drbd_GenCnt_Packet GenCnt; + struct Drbd_State_Packet State; + struct Drbd_Req_State_Packet ReqState; + struct Drbd_RqS_Reply_Packet RqSReply; + struct Drbd_BlockRequest_Packet BlockRequest; +} __attribute((packed)); + +/**********************************************************************/ +enum Drbd_thread_state { + None, + Running, + Exiting, + Restarting +}; + +struct Drbd_thread { + spinlock_t t_lock; + struct task_struct *task; + struct completion stop; + enum Drbd_thread_state t_state; + int (*function) (struct Drbd_thread *); + struct drbd_conf *mdev; + int reset_cpu_mask; +}; + +static inline enum Drbd_thread_state get_t_state(struct Drbd_thread *thi) +{ + /* THINK testing the t_state seems to be uncritical in all cases + * (but thread_{start,stop}), so we can read it *without* the lock. + * --lge */ + + smp_rmb(); + return thi->t_state; +} + + +/* + * Having this as the first member of a struct provides sort of "inheritance". + * "derived" structs can be "drbd_queue_work()"ed. + * The callback should know and cast back to the descendant struct. + * drbd_request and Tl_epoch_entry are descendants of drbd_work. + */ +struct drbd_work; +typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); +struct drbd_work { + struct list_head list; + drbd_work_cb cb; +}; + +struct drbd_barrier; +struct drbd_request { + struct drbd_work w; + struct drbd_conf *mdev; + struct bio *private_bio; + struct hlist_node colision; + sector_t sector; + unsigned int size; + unsigned int epoch; /* barrier_nr */ + + /* barrier_nr: used to check on "completion" whether this req was in + * the current epoch, and we therefore have to close it, + * starting a new epoch... + */ + + /* up to here, the struct layout is identical to Tl_epoch_entry; + * we might be able to use that to our advantage... */ + + struct list_head tl_requests; /* ring list in the transfer log */ + struct bio *master_bio; /* master bio pointer */ + unsigned long rq_state; /* see comments above _req_mod() */ + int seq_num; + unsigned long start_time; +}; + +struct drbd_barrier { + struct drbd_work w; + struct list_head requests; /* requests before */ + struct drbd_barrier *next; /* pointer to the next barrier */ + unsigned int br_number; /* the barriers identifier. */ + int n_req; /* number of requests attached before this barrier */ +}; + +struct drbd_request; + +/* These Tl_epoch_entries may be in one of 6 lists: + active_ee .. data packet being written + sync_ee .. syncer block being written + done_ee .. block written, need to send WriteAck + read_ee .. [RS]DataRequest being read +*/ + +struct drbd_epoch { + struct list_head list; + unsigned int barrier_nr; + atomic_t epoch_size; /* increased on every request added. */ + atomic_t active; /* increased on every req. added, and dec on every finished. */ + unsigned long flags; +}; + +/* drbd_epoch flag bits */ +enum { + DE_BARRIER_IN_NEXT_EPOCH_ISSUED, + DE_BARRIER_IN_NEXT_EPOCH_DONE, + DE_CONTAINS_A_BARRIER, + DE_HAVE_BARRIER_NUMBER, + DE_IS_FINISHING, +}; + +struct Tl_epoch_entry { + struct drbd_work w; + struct drbd_conf *mdev; + struct bio *private_bio; + struct hlist_node colision; + sector_t sector; + unsigned int size; + struct drbd_epoch *epoch; + + /* up to here, the struct layout is identical to drbd_request; + * we might be able to use that to our advantage... */ + + unsigned int flags; + u64 block_id; +}; + +struct digest_info { + int digest_size; + void *digest; +}; + +/* ee flag bits */ +enum { + __EE_CALL_AL_COMPLETE_IO, + __EE_CONFLICT_PENDING, + __EE_MAY_SET_IN_SYNC, + __EE_IS_BARRIER, +}; +#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) +#define EE_CONFLICT_PENDING (1<<__EE_CONFLICT_PENDING) +#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) +#define EE_IS_BARRIER (1<<__EE_IS_BARRIER) + +/* global flag bits */ +enum { + CREATE_BARRIER, /* next Data is preceeded by a Barrier */ + SIGNAL_ASENDER, /* whether asender wants to be interrupted */ + SEND_PING, /* whether asender should send a ping asap */ + WORK_PENDING, /* completion flag for drbd_disconnect */ + STOP_SYNC_TIMER, /* tell timer to cancel itself */ + UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ + UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ + MD_DIRTY, /* current uuids and flags not yet on disk */ + DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ + USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */ + CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */ + CL_ST_CHG_SUCCESS, + CL_ST_CHG_FAIL, + CRASHED_PRIMARY, /* This node was a crashed primary. + * Gets cleared when the state.conn + * goes into Connected state. */ + WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ + NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ + CONSIDER_RESYNC, + + MD_NO_BARRIER, /* meta data device does not support barriers, + so don't even try */ + SUSPEND_IO, /* suspend application io */ + BITMAP_IO, /* suspend application io; + once no more io in flight, start bitmap io */ + BITMAP_IO_QUEUED, /* Started bitmap IO */ + RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ + NET_CONGESTED, /* The data socket is congested */ +}; + +struct drbd_bitmap; /* opaque for drbd_conf */ + +/* TODO sort members for performance + * MAYBE group them further */ + +/* THINK maybe we actually want to use the default "event/%s" worker threads + * or similar in linux 2.6, which uses per cpu data and threads. + * + * To be general, this might need a spin_lock member. + * For now, please use the mdev->req_lock to protect list_head, + * see drbd_queue_work below. + */ +struct drbd_work_queue { + struct list_head q; + struct semaphore s; /* producers up it, worker down()s it */ + spinlock_t q_lock; /* to protect the list. */ +}; + +struct drbd_socket { + struct drbd_work_queue work; + struct mutex mutex; + struct socket *socket; + /* this way we get our + * send/receive buffers off the stack */ + union Drbd_Polymorph_Packet sbuf; + union Drbd_Polymorph_Packet rbuf; +}; + +struct drbd_md { + u64 md_offset; /* sector offset to 'super' block */ + + u64 la_size_sect; /* last agreed size, unit sectors */ + u64 uuid[UUID_SIZE]; + u64 device_uuid; + u32 flags; + u32 md_size_sect; + + s32 al_offset; /* signed relative sector offset to al area */ + s32 bm_offset; /* signed relative sector offset to bitmap */ + + /* u32 al_nr_extents; important for restoring the AL + * is stored into sync_conf.al_extents, which in turn + * gets applied to act_log->nr_elements + */ +}; + +/* for sync_conf and other types... */ +#define NL_PACKET(name, number, fields) struct name { fields }; +#define NL_INTEGER(pn,pr,member) int member; +#define NL_INT64(pn,pr,member) __u64 member; +#define NL_BIT(pn,pr,member) unsigned member:1; +#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len; +#include "linux/drbd_nl.h" + +struct drbd_backing_dev { + struct block_device *backing_bdev; + struct block_device *md_bdev; + struct file *lo_file; + struct file *md_file; + struct drbd_md md; + struct disk_conf dc; /* The user provided config... */ + sector_t known_size; /* last known size of that backing device */ +}; + +struct drbd_md_io { + struct drbd_conf *mdev; + struct completion event; + int error; +}; + +struct bm_io_work { + struct drbd_work w; + char *why; + int (*io_fn)(struct drbd_conf *mdev); + void (*done)(struct drbd_conf *mdev, int rv); +}; + +enum write_ordering_e { + WO_none, + WO_drain_io, + WO_bdev_flush, + WO_bio_barrier +}; + +struct drbd_conf { + /* things that are stored as / read from meta data on disk */ + unsigned long flags; + + /* configured by drbdsetup */ + struct net_conf *net_conf; /* protected by inc_net() and dec_net() */ + struct syncer_conf sync_conf; + struct drbd_backing_dev *bc __protected_by(local); + + sector_t p_size; /* partner's disk size */ + struct request_queue *rq_queue; + struct block_device *this_bdev; + struct gendisk *vdisk; + + struct drbd_socket data; /* data/barrier/cstate/parameter packets */ + struct drbd_socket meta; /* ping/ack (metadata) packets */ + int agreed_pro_version; /* actually used protocol version */ + unsigned long last_received; /* in jiffies, either socket */ + unsigned int ko_count; + struct drbd_work resync_work, + unplug_work, + md_sync_work; + struct timer_list resync_timer; + struct timer_list md_sync_timer; + + /* Used after attach while negotiating new disk state. */ + union drbd_state_t new_state_tmp; + + union drbd_state_t state; + wait_queue_head_t misc_wait; + wait_queue_head_t state_wait; /* upon each state change. */ + unsigned int send_cnt; + unsigned int recv_cnt; + unsigned int read_cnt; + unsigned int writ_cnt; + unsigned int al_writ_cnt; + unsigned int bm_writ_cnt; + atomic_t ap_bio_cnt; /* Requests we need to complete */ + atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ + atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ + atomic_t unacked_cnt; /* Need to send replys for */ + atomic_t local_cnt; /* Waiting for local completion */ + atomic_t net_cnt; /* Users of net_conf */ + spinlock_t req_lock; + struct drbd_barrier *unused_spare_barrier; /* for pre-allocation */ + struct drbd_barrier *newest_barrier; + struct drbd_barrier *oldest_barrier; + struct list_head out_of_sequence_requests; + struct hlist_head *tl_hash; + unsigned int tl_hash_s; + + /* blocks to sync in this run [unit BM_BLOCK_SIZE] */ + unsigned long rs_total; + /* number of sync IOs that failed in this run */ + unsigned long rs_failed; + /* Syncer's start time [unit jiffies] */ + unsigned long rs_start; + /* cumulated time in PausedSyncX state [unit jiffies] */ + unsigned long rs_paused; + /* block not up-to-date at mark [unit BM_BLOCK_SIZE] */ + unsigned long rs_mark_left; + /* marks's time [unit jiffies] */ + unsigned long rs_mark_time; + /* skipped because csum was equeal [unit BM_BLOCK_SIZE] */ + unsigned long rs_same_csum; + sector_t ov_position; + /* Start sector of out of sync range. */ + sector_t ov_last_oos_start; + /* size of out-of-sync range in sectors. */ + sector_t ov_last_oos_size; + unsigned long ov_left; + struct crypto_hash *csums_tfm; + struct crypto_hash *verify_tfm; + + struct Drbd_thread receiver; + struct Drbd_thread worker; + struct Drbd_thread asender; + struct drbd_bitmap *bitmap; + unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ + + /* Used to track operations of resync... */ + struct lru_cache *resync; + /* Number of locked elements in resync LRU */ + unsigned int resync_locked; + /* resync extent number waiting for application requests */ + unsigned int resync_wenr; + + int open_cnt; + u64 *p_uuid; + struct drbd_epoch *current_epoch; + spinlock_t epoch_lock; + unsigned int epochs; + enum write_ordering_e write_ordering; + struct list_head active_ee; /* IO in progress */ + struct list_head sync_ee; /* IO in progress */ + struct list_head done_ee; /* send ack */ + struct list_head read_ee; /* IO in progress */ + struct list_head net_ee; /* zero-copy network send in progress */ + struct hlist_head *ee_hash; /* is proteced by req_lock! */ + unsigned int ee_hash_s; + + /* this one is protected by ee_lock, single thread */ + struct Tl_epoch_entry *last_write_w_barrier; + + int next_barrier_nr; + struct hlist_head *app_reads_hash; /* is proteced by req_lock */ + struct list_head resync_reads; + atomic_t pp_in_use; + wait_queue_head_t ee_wait; + struct page *md_io_page; /* one page buffer for md_io */ + struct page *md_io_tmpp; /* for hardsect != 512 [s390 only?] */ + struct mutex md_io_mutex; /* protects the md_io_buffer */ + spinlock_t al_lock; + wait_queue_head_t al_wait; + struct lru_cache *act_log; /* activity log */ + unsigned int al_tr_number; + int al_tr_cycle; + int al_tr_pos; /* position of the next transaction in the journal */ + struct crypto_hash *cram_hmac_tfm; + struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ + struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ + void *int_dig_out; + void *int_dig_in; + void *int_dig_vv; + wait_queue_head_t seq_wait; + atomic_t packet_seq; + unsigned int peer_seq; + spinlock_t peer_seq_lock; + unsigned int minor; + unsigned long comm_bm_set; /* communicated number of set bits. */ + cpumask_t cpu_mask; + struct bm_io_work bm_io_work; + u64 ed_uuid; /* UUID of the exposed data */ + struct mutex state_mutex; + char congestion_reason; /* Why we where congested... */ +}; + +static inline struct drbd_conf *minor_to_mdev(unsigned int minor) +{ + struct drbd_conf *mdev; + + mdev = minor < minor_count ? minor_table[minor] : NULL; + + return mdev; +} + +static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) +{ + return mdev->minor; +} + +/* returns 1 if it was successfull, + * returns 0 if there was no data socket. + * so wherever you are going to use the data.socket, e.g. do + * if (!drbd_get_data_sock(mdev)) + * return 0; + * CODE(); + * drbd_put_data_sock(mdev); + */ +static inline int drbd_get_data_sock(struct drbd_conf *mdev) +{ + mutex_lock(&mdev->data.mutex); + /* drbd_disconnect() could have called drbd_free_sock() + * while we were waiting in down()... */ + if (unlikely(mdev->data.socket == NULL)) { + mutex_unlock(&mdev->data.mutex); + return 0; + } + return 1; +} + +static inline void drbd_put_data_sock(struct drbd_conf *mdev) +{ + mutex_unlock(&mdev->data.mutex); +} + +/* + * function declarations + *************************/ + +/* drbd_main.c */ + +enum chg_state_flags { + ChgStateHard = 1, + ChgStateVerbose = 2, + ChgWaitComplete = 4, + ChgSerialize = 8, + ChgOrdered = ChgWaitComplete + ChgSerialize, +}; + +extern void drbd_init_set_defaults(struct drbd_conf *mdev); +extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, + union drbd_state_t mask, union drbd_state_t val); +extern void drbd_force_state(struct drbd_conf *, union drbd_state_t, + union drbd_state_t); +extern int _drbd_request_state(struct drbd_conf *, union drbd_state_t, + union drbd_state_t, enum chg_state_flags); +extern int __drbd_set_state(struct drbd_conf *, union drbd_state_t, + enum chg_state_flags, struct completion *done); +extern void print_st_err(struct drbd_conf *, union drbd_state_t, + union drbd_state_t, int); +extern int drbd_thread_start(struct Drbd_thread *thi); +extern void _drbd_thread_stop(struct Drbd_thread *thi, int restart, int wait); +#ifdef CONFIG_SMP +extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); +extern cpumask_t drbd_calc_cpu_mask(struct drbd_conf *mdev); +#else +#define drbd_thread_current_set_cpu(A) ({}) +#define drbd_calc_cpu_mask(A) CPU_MASK_ALL +#endif +extern void drbd_free_resources(struct drbd_conf *mdev); +extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, + unsigned int set_size); +extern void tl_clear(struct drbd_conf *mdev); +extern void _tl_add_barrier(struct drbd_conf *, struct drbd_barrier *); +extern void drbd_free_sock(struct drbd_conf *mdev); +extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, + void *buf, size_t size, unsigned msg_flags); +extern int drbd_send_protocol(struct drbd_conf *mdev); +extern int _drbd_send_uuids(struct drbd_conf *mdev); +extern int drbd_send_uuids(struct drbd_conf *mdev); +extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); +extern int drbd_send_sizes(struct drbd_conf *mdev); +extern int _drbd_send_state(struct drbd_conf *mdev); +extern int drbd_send_state(struct drbd_conf *mdev); +extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, + enum Drbd_Packet_Cmd cmd, struct Drbd_Header *h, + size_t size, unsigned msg_flags); +#define USE_DATA_SOCKET 1 +#define USE_META_SOCKET 0 +extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, + enum Drbd_Packet_Cmd cmd, struct Drbd_Header *h, + size_t size); +extern int drbd_send_cmd2(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, + char *data, size_t size); +extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); +extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, + u32 set_size); +extern int drbd_send_ack(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, + struct Tl_epoch_entry *e); +extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, + struct Drbd_BlockRequest_Packet *rp); +extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, + struct Drbd_Data_Packet *dp); +extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, + sector_t sector, int blksize, u64 block_id); +extern int _drbd_send_page(struct drbd_conf *mdev, struct page *page, + int offset, size_t size); +extern int drbd_send_block(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, + struct Tl_epoch_entry *e); +extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); +extern int _drbd_send_barrier(struct drbd_conf *mdev, + struct drbd_barrier *barrier); +extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, + sector_t sector, int size, u64 block_id); +extern int drbd_send_drequest_csum(struct drbd_conf *mdev, + sector_t sector,int size, + void *digest, int digest_size, + enum Drbd_Packet_Cmd cmd); +extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size); + +extern int drbd_send_bitmap(struct drbd_conf *mdev); +extern int _drbd_send_bitmap(struct drbd_conf *mdev); +extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode); +extern void drbd_free_bc(struct drbd_backing_dev *bc); +extern int drbd_io_error(struct drbd_conf *mdev, int forcedetach); +extern void drbd_mdev_cleanup(struct drbd_conf *mdev); + +/* drbd_meta-data.c (still in drbd_main.c) */ +extern void drbd_md_sync(struct drbd_conf *mdev); +extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); +/* maybe define them below as inline? */ +extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); +extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); +extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); +extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local); +extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local); +extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local); +extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local); +extern int drbd_md_test_flag(struct drbd_backing_dev *, int); +extern void drbd_md_mark_dirty(struct drbd_conf *mdev); +extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, + int (*io_fn)(struct drbd_conf *), + void (*done)(struct drbd_conf *, int), + char *why); +extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); +extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); +extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); + + +/* Meta data layout + We reserve a 128MB Block (4k aligned) + * either at the end of the backing device + * or on a seperate meta data device. */ + +#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ +/* The following numbers are sectors */ +#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ +#define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */ +/* Allows up to about 3.8TB */ +#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) + +/* Since the smalles IO unit is usually 512 byte */ +#define MD_HARDSECT_B 9 +#define MD_HARDSECT (1< we need 32 KB bitmap. + * Bit 0 ==> local node thinks this block is binary identical on both nodes + * Bit 1 ==> local node thinks this block needs to be synced. + */ + +#define BM_BLOCK_SIZE_B 12 /* 4k per bit */ +#define BM_BLOCK_SIZE (1<>(BM_BLOCK_SIZE_B-9)) +#define BM_BIT_TO_SECT(x) ((sector_t)(x)<<(BM_BLOCK_SIZE_B-9)) +#define BM_SECT_PER_BIT BM_BIT_TO_SECT(1) + +/* bit to represented kilo byte conversion */ +#define Bit2KB(bits) ((bits)<<(BM_BLOCK_SIZE_B-10)) + +/* in which _bitmap_ extent (resp. sector) the bit for a certain + * _storage_ sector is located in */ +#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SIZE_B-9)) + +/* how much _storage_ sectors we have per bitmap sector */ +#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SIZE_B-9)) +#define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) + +/* in one sector of the bitmap, we have this many activity_log extents. */ +#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SIZE_B - AL_EXTENT_SIZE_B)) +#define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SIZE_B-BM_BLOCK_SIZE_B-LN2_BPL)) + +#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SIZE_B - BM_BLOCK_SIZE_B) +#define BM_BLOCKS_PER_BM_EXT_MASK ((1<= level) && (type & trace_type); +} +static inline int +is_mdev_trace(struct drbd_conf *mdev, unsigned int type, unsigned int level) { + return is_trace(type, level) && + ((1 << mdev_to_minor(mdev)) & trace_devs); +} + +#define MTRACE(type, lvl, code...) \ +do { \ + if (unlikely(is_mdev_trace(mdev, type, lvl))) { \ + code \ + } \ +} while (0) + +#define TRACE(type, lvl, code...) \ +do { \ + if (unlikely(is_trace(type, lvl))) { \ + code \ + } \ +} while (0) + +/* Buffer printing support + * dbg_print_flags: used for Flags arg to drbd_print_buffer + * - DBGPRINT_BUFFADDR; if set, each line starts with the + * virtual address of the line being output. If clear, + * each line starts with the offset from the beginning + * of the buffer. */ +enum dbg_print_flags { + DBGPRINT_BUFFADDR = 0x0001, +}; + +extern void drbd_print_uuid(struct drbd_conf *mdev, unsigned int idx); + +extern void drbd_print_buffer(const char *prefix, unsigned int flags, int size, + const void *buffer, const void *buffer_va, + unsigned int length); + +/* Bio printing support */ +extern void _dump_bio(const char *pfx, struct drbd_conf *mdev, struct bio *bio, int complete, struct drbd_request *r); + +static inline void dump_bio(struct drbd_conf *mdev, + struct bio *bio, int complete, struct drbd_request *r) +{ + MTRACE(TraceTypeRq, TraceLvlSummary, + _dump_bio("Rq", mdev, bio, complete, r); + ); +} + +static inline void dump_internal_bio(const char *pfx, struct drbd_conf *mdev, struct bio *bio, int complete) +{ + MTRACE(TraceTypeIntRq, TraceLvlSummary, + _dump_bio(pfx, mdev, bio, complete, NULL); + ); +} + +/* Packet dumping support */ +extern void _dump_packet(struct drbd_conf *mdev, struct socket *sock, + int recv, union Drbd_Polymorph_Packet *p, + char *file, int line); + +static inline void +dump_packet(struct drbd_conf *mdev, struct socket *sock, + int recv, union Drbd_Polymorph_Packet *p, char *file, int line) +{ + MTRACE(TraceTypePacket, TraceLvlSummary, + _dump_packet(mdev, sock, recv, p, file, line); + ); +} + +#else + +#define MTRACE(ignored...) ((void)0) +#define TRACE(ignored...) ((void)0) + +#define dump_bio(ignored...) ((void)0) +#define dump_internal_bio(ignored...) ((void)0) +#define dump_packet(ignored...) ((void)0) +#endif + +/* drbd_req */ +extern int drbd_make_request_26(struct request_queue *q, struct bio *bio); +extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); +extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); +extern int is_valid_ar_handle(struct drbd_request *, sector_t); + + +/* drbd_nl.c */ +extern void drbd_suspend_io(struct drbd_conf *mdev); +extern void drbd_resume_io(struct drbd_conf *mdev); +extern char *ppsize(char *buf, unsigned long long size); +extern sector_t drbd_new_dev_size(struct drbd_conf *, + struct drbd_backing_dev *); +enum determin_dev_size_enum { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; +extern enum determin_dev_size_enum drbd_determin_dev_size(struct drbd_conf *) __must_hold(local); +extern void resync_after_online_grow(struct drbd_conf *); +extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); +extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, + int force); +enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); +extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); + +/* drbd_worker.c */ +extern int drbd_worker(struct Drbd_thread *thi); +extern void drbd_alter_sa(struct drbd_conf *mdev, int na); +extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); +extern void resume_next_sg(struct drbd_conf *mdev); +extern void suspend_other_sg(struct drbd_conf *mdev); +extern int drbd_resync_finished(struct drbd_conf *mdev); +/* maybe rather drbd_main.c ? */ +extern int drbd_md_sync_page_io(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev, sector_t sector, int rw); +extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); + +static inline void ov_oos_print(struct drbd_conf *mdev) +{ + if (mdev->ov_last_oos_size) { + ERR("Out of sync: start=%llu, size=%lu (sectors)\n", + (unsigned long long)mdev->ov_last_oos_start, + (unsigned long)mdev->ov_last_oos_size); + } + mdev->ov_last_oos_size=0; +} + + +void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); +/* worker callbacks */ +extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); +extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); +extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); +extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); +extern int w_io_error(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); +extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); +extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); +extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); + +extern void resync_timer_fn(unsigned long data); + +/* drbd_receiver.c */ +extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); +extern struct Tl_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, + u64 id, + sector_t sector, + unsigned int data_size, + gfp_t gfp_mask) __must_hold(local); +extern void drbd_free_ee(struct drbd_conf *mdev, struct Tl_epoch_entry *e); +extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev, + struct list_head *head); +extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, + struct list_head *head); +extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); +extern void _drbd_clear_done_ee(struct drbd_conf *mdev); + +/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to + * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ +static inline int drbd_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, int optlen) +{ + int err; + if (level == SOL_SOCKET) + err = sock_setsockopt(sock, level, optname, optval, optlen); + else + err = sock->ops->setsockopt(sock, level, optname, optval, + optlen); + return err; +} + +static inline void drbd_tcp_cork(struct socket *sock) +{ + int __user val = 1; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, + (char __user *)&val, sizeof(val)); +} + +static inline void drbd_tcp_uncork(struct socket *sock) +{ + int __user val = 0; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK, + (char __user *)&val, sizeof(val)); +} + +static inline void drbd_tcp_nodelay(struct socket *sock) +{ + int __user val = 1; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY, + (char __user *)&val, sizeof(val)); +} + +static inline void drbd_tcp_quickack(struct socket *sock) +{ + int __user val = 1; + (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, + (char __user *)&val, sizeof(val)); +} + +void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo); + +/* drbd_proc.c */ +extern struct proc_dir_entry *drbd_proc; +extern struct file_operations drbd_proc_fops; +extern const char *conns_to_name(enum drbd_conns s); +extern const char *roles_to_name(enum drbd_role s); + +/* drbd_actlog.c */ +extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector); +extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector); +extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector); +extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector); +extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector); +extern void drbd_rs_cancel_all(struct drbd_conf *mdev); +extern int drbd_rs_del_all(struct drbd_conf *mdev); +extern void drbd_rs_failed_io(struct drbd_conf *mdev, + sector_t sector, int size); +extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *); +extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, + int size, const char *file, const unsigned int line); +#define drbd_set_in_sync(mdev, sector, size) \ + __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) +extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, + int size, const char *file, const unsigned int line); +#define drbd_set_out_of_sync(mdev, sector, size) \ + __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) +extern void drbd_al_apply_to_bm(struct drbd_conf *mdev); +extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev); +extern void drbd_al_shrink(struct drbd_conf *mdev); + + +/* drbd_nl.c */ + +void drbd_nl_cleanup(void); +int __init drbd_nl_init(void); +void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state_t); +void drbd_bcast_sync_progress(struct drbd_conf *mdev); +void drbd_bcast_ee(struct drbd_conf *mdev, + const char *reason, const int dgs, + const char* seen_hash, const char* calc_hash, + const struct Tl_epoch_entry* e); + + +/** DRBD State macros: + * These macros are used to express state changes in easily readable form. + * + * The NS macros expand to a mask and a value, that can be bit ored onto the + * current state as soon as the spinlock (req_lock) was taken. + * + * The _NS macros are used for state functions that get called with the + * spinlock. These macros expand directly to the new state value. + * + * Besides the basic forms NS() and _NS() additional _?NS[23] are defined + * to express state changes that affect more than one aspect of the state. + * + * E.g. NS2(conn, Connected, peer, Secondary) + * Means that the network connection was established and that the peer + * is in secondary role. + */ +#define peer_mask role_mask +#define pdsk_mask disk_mask +#define susp_mask 1 +#define user_isp_mask 1 +#define aftr_isp_mask 1 + +#define NS(T, S) \ + ({ union drbd_state_t mask; mask.i = 0; mask.T = T##_mask; mask; }), \ + ({ union drbd_state_t val; val.i = 0; val.T = (S); val; }) +#define NS2(T1, S1, T2, S2) \ + ({ union drbd_state_t mask; mask.i = 0; mask.T1 = T1##_mask; \ + mask.T2 = T2##_mask; mask; }), \ + ({ union drbd_state_t val; val.i = 0; val.T1 = (S1); \ + val.T2 = (S2); val; }) +#define NS3(T1, S1, T2, S2, T3, S3) \ + ({ union drbd_state_t mask; mask.i = 0; mask.T1 = T1##_mask; \ + mask.T2 = T2##_mask; mask.T3 = T3##_mask; mask; }), \ + ({ union drbd_state_t val; val.i = 0; val.T1 = (S1); \ + val.T2 = (S2); val.T3 = (S3); val; }) + +#define _NS(D, T, S) \ + D, ({ union drbd_state_t __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) +#define _NS2(D, T1, S1, T2, S2) \ + D, ({ union drbd_state_t __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + __ns.T2 = (S2); __ns; }) +#define _NS3(D, T1, S1, T2, S2, T3, S3) \ + D, ({ union drbd_state_t __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) + +/* + * inline helper functions + *************************/ + +static inline void drbd_state_lock(struct drbd_conf *mdev) +{ + wait_event(mdev->misc_wait, + !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)); +} + +static inline void drbd_state_unlock(struct drbd_conf *mdev) +{ + clear_bit(CLUSTER_ST_CHANGE, &mdev->flags); + wake_up(&mdev->misc_wait); +} + +static inline int _drbd_set_state(struct drbd_conf *mdev, + union drbd_state_t ns, enum chg_state_flags flags, + struct completion *done) +{ + int rv; + + read_lock(&global_state_lock); + rv = __drbd_set_state(mdev, ns, flags, done); + read_unlock(&global_state_lock); + + return rv; +} + +static inline int drbd_request_state(struct drbd_conf *mdev, + union drbd_state_t mask, + union drbd_state_t val) +{ + return _drbd_request_state(mdev, mask, val, ChgStateVerbose + ChgOrdered); +} + +/** + * drbd_chk_io_error: Handles the on_io_error setting, should be called from + * all io completion handlers. See also drbd_io_error(). + */ +static inline void __drbd_chk_io_error(struct drbd_conf *mdev, int forcedetach) +{ + switch (mdev->bc->dc.on_io_error) { + case PassOn: + if (!forcedetach) { + if (printk_ratelimit()) + ERR("Local IO failed. Passing error on...\n"); + break; + } + /* NOTE fall through to detach case if forcedetach set */ + case Detach: + case CallIOEHelper: + if (mdev->state.disk > Failed) { + _drbd_set_state(_NS(mdev, disk, Failed), ChgStateHard, NULL); + ERR("Local IO failed. Detaching...\n"); + } + break; + } +} + +static inline void drbd_chk_io_error(struct drbd_conf *mdev, + int error, int forcedetach) +{ + if (error) { + unsigned long flags; + spin_lock_irqsave(&mdev->req_lock, flags); + __drbd_chk_io_error(mdev, forcedetach); + spin_unlock_irqrestore(&mdev->req_lock, flags); + } +} + +/* Returns the first sector number of our meta data, + * which, for internal meta data, happens to be the maximum capacity + * we could agree upon with our peer + */ +static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) +{ + switch (bdev->dc.meta_dev_idx) { + case DRBD_MD_INDEX_INTERNAL: + case DRBD_MD_INDEX_FLEX_INT: + return bdev->md.md_offset + bdev->md.bm_offset; + case DRBD_MD_INDEX_FLEX_EXT: + default: + return bdev->md.md_offset; + } +} + +/* returns the last sector number of our meta data, + * to be able to catch out of band md access */ +static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) +{ + switch (bdev->dc.meta_dev_idx) { + case DRBD_MD_INDEX_INTERNAL: + case DRBD_MD_INDEX_FLEX_INT: + return bdev->md.md_offset + MD_AL_OFFSET - 1; + case DRBD_MD_INDEX_FLEX_EXT: + default: + return bdev->md.md_offset + bdev->md.md_size_sect; + } +} + +/* Returns the number of 512 byte sectors of the device */ +static inline sector_t drbd_get_capacity(struct block_device *bdev) +{ + /* return bdev ? get_capacity(bdev->bd_disk) : 0; */ + return bdev ? bdev->bd_inode->i_size >> 9 : 0; +} + +/* returns the capacity we announce to out peer. + * we clip ourselves at the various MAX_SECTORS, because if we don't, + * current implementation will oops sooner or later */ +static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) +{ + sector_t s; + switch (bdev->dc.meta_dev_idx) { + case DRBD_MD_INDEX_INTERNAL: + case DRBD_MD_INDEX_FLEX_INT: + s = drbd_get_capacity(bdev->backing_bdev) + ? min_t(sector_t, DRBD_MAX_SECTORS_FLEX, + drbd_md_first_sector(bdev)) + : 0; + break; + case DRBD_MD_INDEX_FLEX_EXT: + s = min_t(sector_t, DRBD_MAX_SECTORS_FLEX, + drbd_get_capacity(bdev->backing_bdev)); + /* clip at maximum size the meta device can support */ + s = min_t(sector_t, s, + BM_EXT_TO_SECT(bdev->md.md_size_sect + - bdev->md.bm_offset)); + break; + default: + s = min_t(sector_t, DRBD_MAX_SECTORS, + drbd_get_capacity(bdev->backing_bdev)); + } + return s; +} + +/* returns the sector number of our meta data 'super' block */ +static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, + struct drbd_backing_dev *bdev) +{ + switch (bdev->dc.meta_dev_idx) { + default: /* external, some index */ + return MD_RESERVED_SECT * bdev->dc.meta_dev_idx; + case DRBD_MD_INDEX_INTERNAL: + /* with drbd08, internal meta data is always "flexible" */ + case DRBD_MD_INDEX_FLEX_INT: + /* sizeof(struct md_on_disk_07) == 4k + * position: last 4k aligned block of 4k size */ + if (!bdev->backing_bdev) { + if (__ratelimit(&drbd_ratelimit_state)) { + ERR("bdev->backing_bdev==NULL\n"); + dump_stack(); + } + return 0; + } + return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL) + - MD_AL_OFFSET; + case DRBD_MD_INDEX_FLEX_EXT: + return 0; + } +} + +static inline void +_drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) +{ + list_add_tail(&w->list, &q->q); + up(&q->s); +} + +static inline void +drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w) +{ + unsigned long flags; + spin_lock_irqsave(&q->q_lock, flags); + list_add(&w->list, &q->q); + up(&q->s); /* within the spinlock, + see comment near end of drbd_worker() */ + spin_unlock_irqrestore(&q->q_lock, flags); +} + +static inline void +drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) +{ + unsigned long flags; + spin_lock_irqsave(&q->q_lock, flags); + list_add_tail(&w->list, &q->q); + up(&q->s); /* within the spinlock, + see comment near end of drbd_worker() */ + spin_unlock_irqrestore(&q->q_lock, flags); +} + +static inline void wake_asender(struct drbd_conf *mdev) +{ + if (test_bit(SIGNAL_ASENDER, &mdev->flags)) + force_sig(DRBD_SIG, mdev->asender.task); +} + +static inline void request_ping(struct drbd_conf *mdev) +{ + set_bit(SEND_PING, &mdev->flags); + wake_asender(mdev); +} + +static inline int drbd_send_short_cmd(struct drbd_conf *mdev, + enum Drbd_Packet_Cmd cmd) +{ + struct Drbd_Header h; + return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); +} + +static inline int drbd_send_ping(struct drbd_conf *mdev) +{ + struct Drbd_Header h; + return drbd_send_cmd(mdev, USE_META_SOCKET, Ping, &h, sizeof(h)); +} + +static inline int drbd_send_ping_ack(struct drbd_conf *mdev) +{ + struct Drbd_Header h; + return drbd_send_cmd(mdev, USE_META_SOCKET, PingAck, &h, sizeof(h)); +} + +static inline void drbd_thread_stop(struct Drbd_thread *thi) +{ + _drbd_thread_stop(thi, FALSE, TRUE); +} + +static inline void drbd_thread_stop_nowait(struct Drbd_thread *thi) +{ + _drbd_thread_stop(thi, FALSE, FALSE); +} + +static inline void drbd_thread_restart_nowait(struct Drbd_thread *thi) +{ + _drbd_thread_stop(thi, TRUE, FALSE); +} + +/* counts how many answer packets packets we expect from our peer, + * for either explicit application requests, + * or implicit barrier packets as necessary. + * increased: + * w_send_barrier + * _req_mod(req, queue_for_net_write or queue_for_net_read); + * it is much easier and equally valid to count what we queue for the + * worker, even before it actually was queued or send. + * (drbd_make_request_common; recovery path on read io-error) + * decreased: + * got_BarrierAck (respective tl_clear, tl_clear_barrier) + * _req_mod(req, data_received) + * [from receive_DataReply] + * _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked) + * [from got_BlockAck (WriteAck, RecvAck)] + * for some reason it is NOT decreased in got_NegAck, + * but in the resulting cleanup code from report_params. + * we should try to remember the reason for that... + * _req_mod(req, send_failed or send_canceled) + * _req_mod(req, connection_lost_while_pending) + * [from tl_clear_barrier] + */ +static inline void inc_ap_pending(struct drbd_conf *mdev) +{ + atomic_inc(&mdev->ap_pending_cnt); +} + +#define ERR_IF_CNT_IS_NEGATIVE(which) \ + if (atomic_read(&mdev->which) < 0) \ + ERR("in %s:%d: " #which " = %d < 0 !\n", \ + __func__ , __LINE__ , \ + atomic_read(&mdev->which)) + +#define dec_ap_pending(mdev) do { \ + typecheck(struct drbd_conf *, mdev); \ + if (atomic_dec_and_test(&mdev->ap_pending_cnt)) \ + wake_up(&mdev->misc_wait); \ + ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0) + +/* counts how many resync-related answers we still expect from the peer + * increase decrease + * SyncTarget sends RSDataRequest (and expects RSDataReply) + * SyncSource sends RSDataReply (and expects WriteAck whith ID_SYNCER) + * (or NegAck with ID_SYNCER) + */ +static inline void inc_rs_pending(struct drbd_conf *mdev) +{ + atomic_inc(&mdev->rs_pending_cnt); +} + +#define dec_rs_pending(mdev) do { \ + typecheck(struct drbd_conf *, mdev); \ + atomic_dec(&mdev->rs_pending_cnt); \ + ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0) + +/* counts how many answers we still need to send to the peer. + * increased on + * receive_Data unless protocol A; + * we need to send a RecvAck (proto B) + * or WriteAck (proto C) + * receive_RSDataReply (recv_resync_read) we need to send a WriteAck + * receive_DataRequest (receive_RSDataRequest) we need to send back Data + * receive_Barrier_* we need to send a BarrierAck + */ +static inline void inc_unacked(struct drbd_conf *mdev) +{ + atomic_inc(&mdev->unacked_cnt); +} + +#define dec_unacked(mdev) do { \ + typecheck(struct drbd_conf *, mdev); \ + atomic_dec(&mdev->unacked_cnt); \ + ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) + +#define sub_unacked(mdev, n) do { \ + typecheck(struct drbd_conf *, mdev); \ + atomic_sub(n, &mdev->unacked_cnt); \ + ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) + + +static inline void dec_net(struct drbd_conf *mdev) +{ + if (atomic_dec_and_test(&mdev->net_cnt)) + wake_up(&mdev->misc_wait); +} + +/** + * inc_net: Returns TRUE when it is ok to access mdev->net_conf. You + * should call dec_net() when finished looking at mdev->net_conf. + */ +static inline int inc_net(struct drbd_conf *mdev) +{ + int have_net_conf; + + atomic_inc(&mdev->net_cnt); + have_net_conf = mdev->state.conn >= Unconnected; + if (!have_net_conf) + dec_net(mdev); + return have_net_conf; +} + +/** + * inc_local: Returns TRUE when local IO is possible. If it returns + * TRUE you should call dec_local() after IO is completed. + */ +#define inc_local_if_state(M,MINS) __cond_lock(local, _inc_local_if_state(M,MINS)) +#define inc_local(M) __cond_lock(local, _inc_local_if_state(M,Inconsistent)) + +static inline void dec_local(struct drbd_conf *mdev) +{ + __release(local); + if (atomic_dec_and_test(&mdev->local_cnt)) + wake_up(&mdev->misc_wait); + D_ASSERT(atomic_read(&mdev->local_cnt) >= 0); +} + +#ifndef __CHECKER__ +static inline int _inc_local_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) +{ + int io_allowed; + + atomic_inc(&mdev->local_cnt); + io_allowed = (mdev->state.disk >= mins); + if (!io_allowed) + dec_local(mdev); + return io_allowed; +} +#else +extern int _inc_local_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins); +#endif + +/* you must have an "inc_local" reference */ +static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, + unsigned long *bits_left, unsigned int *per_mil_done) +{ + /* + * this is to break it at compile time when we change that + * (we may feel 4TB maximum storage per drbd is not enough) + */ + typecheck(unsigned long, mdev->rs_total); + + /* note: both rs_total and rs_left are in bits, i.e. in + * units of BM_BLOCK_SIZE. + * for the percentage, we don't care. */ + + *bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; + /* >> 10 to prevent overflow, + * +1 to prevent division by zero */ + if (*bits_left > mdev->rs_total) { + /* doh. maybe a logic bug somewhere. + * may also be just a race condition + * between this and a disconnect during sync. + * for now, just prevent in-kernel buffer overflow. + */ + smp_rmb(); + drbd_WARN("cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", + conns_to_name(mdev->state.conn), + *bits_left, mdev->rs_total, mdev->rs_failed); + *per_mil_done = 0; + } else { + /* make sure the calculation happens in long context */ + unsigned long tmp = 1000UL - + (*bits_left >> 10)*1000UL + / ((mdev->rs_total >> 10) + 1UL); + *per_mil_done = tmp; + } +} + + +/* this throttles on-the-fly application requests + * according to max_buffers settings; + * maybe re-implement using semaphores? */ +static inline int drbd_get_max_buffers(struct drbd_conf *mdev) +{ + int mxb = 1000000; /* arbitrary limit on open requests */ + if (inc_net(mdev)) { + mxb = mdev->net_conf->max_buffers; + dec_net(mdev); + } + return mxb; +} + +static inline int drbd_state_is_stable(union drbd_state_t s) +{ + + /* DO NOT add a default clause, we want the compiler to warn us + * for any newly introduced state we may have forgotten to add here */ + + switch ((enum drbd_conns)s.conn) { + /* new io only accepted when there is no connection, ... */ + case StandAlone: + case WFConnection: + /* ... or there is a well established connection. */ + case Connected: + case SyncSource: + case SyncTarget: + case VerifyS: + case VerifyT: + case PausedSyncS: + case PausedSyncT: + /* maybe stable, look at the disk state */ + break; + + /* no new io accepted during tansitional states + * like handshake or teardown */ + case Disconnecting: + case Unconnected: + case Timeout: + case BrokenPipe: + case NetworkFailure: + case ProtocolError: + case TearDown: + case WFReportParams: + case StartingSyncS: + case StartingSyncT: + case WFBitMapS: + case WFBitMapT: + case WFSyncUUID: + case conn_mask: + /* not "stable" */ + return 0; + } + + switch ((enum drbd_disk_state)s.disk) { + case Diskless: + case Inconsistent: + case Outdated: + case Consistent: + case UpToDate: + /* disk state is stable as well. */ + break; + + /* no new io accepted during tansitional states */ + case Attaching: + case Failed: + case Negotiating: + case DUnknown: + case disk_mask: + /* not "stable" */ + return 0; + } + + return 1; +} + +static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) +{ + int mxb = drbd_get_max_buffers(mdev); + + if (mdev->state.susp) + return 0; + if (test_bit(SUSPEND_IO, &mdev->flags)) + return 0; + + /* to avoid potential deadlock or bitmap corruption, + * in various places, we only allow new application io + * to start during "stable" states. */ + + /* no new io accepted when attaching or detaching the disk */ + if (!drbd_state_is_stable(mdev->state)) + return 0; + + /* since some older kernels don't have atomic_add_unless, + * and we are within the spinlock anyways, we have this workaround. */ + if (atomic_read(&mdev->ap_bio_cnt) > mxb) + return 0; + if (test_bit(BITMAP_IO, &mdev->flags)) + return 0; + return 1; +} + +/* I'd like to use wait_event_lock_irq, + * but I'm not sure when it got introduced, + * and not sure when it has 3 or 4 arguments */ +static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) +{ + /* compare with after_state_ch, + * os.conn != WFBitMapS && ns.conn == WFBitMapS */ + DEFINE_WAIT(wait); + + /* we wait here + * as long as the device is suspended + * until the bitmap is no longer on the fly during connection + * handshake as long as we would exeed the max_buffer limit. + * + * to avoid races with the reconnect code, + * we need to atomic_inc within the spinlock. */ + + spin_lock_irq(&mdev->req_lock); + while (!__inc_ap_bio_cond(mdev)) { + prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&mdev->req_lock); + schedule(); + finish_wait(&mdev->misc_wait, &wait); + spin_lock_irq(&mdev->req_lock); + } + atomic_add(one_or_two, &mdev->ap_bio_cnt); + spin_unlock_irq(&mdev->req_lock); +} + +static inline void dec_ap_bio(struct drbd_conf *mdev) +{ + int mxb = drbd_get_max_buffers(mdev); + int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt); + + D_ASSERT(ap_bio >= 0); + /* this currently does wake_up for every dec_ap_bio! + * maybe rather introduce some type of hysteresis? + * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */ + if (ap_bio < mxb) + wake_up(&mdev->misc_wait); + if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { + if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) + drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); + } +} + +static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) +{ + mdev->ed_uuid = val; + + MTRACE(TraceTypeUuid, TraceLvlMetrics, + INFO(" exposed data uuid now %016llX\n", + (unsigned long long)val); + ); +} + +static inline int seq_cmp(u32 a, u32 b) +{ + /* we assume wrap around at 32bit. + * for wrap around at 24bit (old atomic_t), + * we'd have to + * a <<= 8; b <<= 8; + */ + return (s32)(a) - (s32)(b); +} +#define seq_lt(a, b) (seq_cmp((a), (b)) < 0) +#define seq_gt(a, b) (seq_cmp((a), (b)) > 0) +#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0) +#define seq_le(a, b) (seq_cmp((a), (b)) <= 0) +/* CAUTION: please no side effects in arguments! */ +#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b))) + +static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) +{ + unsigned int m; + spin_lock(&mdev->peer_seq_lock); + m = seq_max(mdev->peer_seq, new_seq); + mdev->peer_seq = m; + spin_unlock(&mdev->peer_seq_lock); + if (m == new_seq) + wake_up(&mdev->seq_wait); +} + +static inline void drbd_update_congested(struct drbd_conf *mdev) +{ + struct sock *sk = mdev->data.socket->sk; + if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) + set_bit(NET_CONGESTED, &mdev->flags); +} + +static inline int drbd_queue_order_type(struct drbd_conf *mdev) +{ + /* sorry, we currently have no working implementation + * of distributed TCQ stuff */ +#ifndef QUEUE_ORDERED_NONE +#define QUEUE_ORDERED_NONE 0 +#endif + return QUEUE_ORDERED_NONE; +} + +static inline void drbd_blk_run_queue(struct request_queue *q) +{ + if (q && q->unplug_fn) + q->unplug_fn(q); +} + +static inline void drbd_kick_lo(struct drbd_conf *mdev) +{ + if (inc_local(mdev)) { + drbd_blk_run_queue(bdev_get_queue(mdev->bc->backing_bdev)); + dec_local(mdev); + } +} + +static inline void drbd_md_flush(struct drbd_conf *mdev) +{ + int r; + + if (test_bit(MD_NO_BARRIER, &mdev->flags)) + return; + + r = blkdev_issue_flush(mdev->bc->md_bdev, NULL); + if (r) { + set_bit(MD_NO_BARRIER, &mdev->flags); + ERR("meta data flush failed with status %d, disabling md-flushes\n", r); + } +} + +#endif diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h new file mode 100644 index 000000000000..e1dc3af1ddff --- /dev/null +++ b/drivers/block/drbd/drbd_wrappers.h @@ -0,0 +1,117 @@ +#include +#include + + +/* see get_sb_bdev and bd_claim */ +extern char *drbd_sec_holder; + +static inline sector_t drbd_get_hardsect(struct block_device *bdev) +{ + return bdev->bd_disk->queue->hardsect_size; +} + +/* sets the number of 512 byte sectors of our virtual device */ +static inline void drbd_set_my_capacity(struct drbd_conf *mdev, + sector_t size) +{ + /* set_capacity(mdev->this_bdev->bd_disk, size); */ + set_capacity(mdev->vdisk, size); + mdev->this_bdev->bd_inode->i_size = (loff_t)size << 9; +} + +#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE) + +static inline int drbd_bio_has_active_page(struct bio *bio) +{ + struct bio_vec *bvec; + int i; + + __bio_for_each_segment(bvec, bio, i, 0) { + if (page_count(bvec->bv_page) > 1) + return 1; + } + + return 0; +} + +/* bi_end_io handlers */ +extern void drbd_md_io_complete(struct bio *bio, int error); +extern void drbd_endio_read_sec(struct bio *bio, int error); +extern void drbd_endio_write_sec(struct bio *bio, int error); +extern void drbd_endio_pri(struct bio *bio, int error); + +/* how to get to the kobj of a gendisk. + * see also upstream commits + * edfaa7c36574f1bf09c65ad602412db9da5f96bf + * ed9e1982347b36573cd622ee5f4e2a7ccd79b3fd + * 548b10eb2959c96cef6fc29fc96e0931eeb53bc5 + */ +#ifndef dev_to_disk +# define disk_to_kobj(disk) (&(disk)->kobj) +#else +# ifndef disk_to_dev +# define disk_to_dev(disk) (&(disk)->dev) +# endif +# define disk_to_kobj(disk) (&disk_to_dev(disk)->kobj) +#endif +static inline void drbd_kobject_uevent(struct drbd_conf *mdev) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10) +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,15) + kobject_uevent(disk_to_kobj(mdev->vdisk), KOBJ_CHANGE, NULL); +#else + kobject_uevent(disk_to_kobj(mdev->vdisk), KOBJ_CHANGE); + /* rhel4 / sles9 and older don't have this at all, + * which means user space (udev) won't get events about possible changes of + * corresponding resource + disk names after the initial drbd minor creation. + */ +#endif +#endif +} + + +/* + * used to submit our private bio + */ +static inline void drbd_generic_make_request(struct drbd_conf *mdev, + int fault_type, struct bio *bio) +{ + __release(local); + if (!bio->bi_bdev) { + printk(KERN_ERR "drbd%d: drbd_generic_make_request: " + "bio->bi_bdev == NULL\n", + mdev_to_minor(mdev)); + dump_stack(); + bio_endio(bio, -ENODEV); + return; + } + + if (FAULT_ACTIVE(mdev, fault_type)) + bio_endio(bio, -EIO); + else + generic_make_request(bio); +} + +static inline void drbd_plug_device(struct drbd_conf *mdev) +{ + struct request_queue *q; + q = bdev_get_queue(mdev->this_bdev); + + spin_lock_irq(q->queue_lock); + +/* XXX the check on !blk_queue_plugged is redundant, + * implicitly checked in blk_plug_device */ + + if (!blk_queue_plugged(q)) { + blk_plug_device(q); + del_timer(&q->unplug_timer); + /* unplugging should not happen automatically... */ + } + spin_unlock_irq(q->queue_lock); +} + +#ifndef __CHECKER__ +# undef __cond_lock +# define __cond_lock(x,c) (c) +#endif + -- cgit v1.2.3 From 8a4ebc3fbd90072ac870064a930fe612eea6d9a1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:15 +0200 Subject: DRBD: main The DRBD state engine, and lots of other stuff, that does not have its own source file. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4034 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 4034 insertions(+) create mode 100644 drivers/block/drbd/drbd_main.c diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c new file mode 100644 index 000000000000..9ca38a06fdf1 --- /dev/null +++ b/drivers/block/drbd/drbd_main.c @@ -0,0 +1,4034 @@ +/* + drbd.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define __KERNEL_SYSCALLS__ +#include +#include + +#include +#include +#include "drbd_int.h" +#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ + +#include "drbd_vli.h" + +struct after_state_chg_work { + struct drbd_work w; + union drbd_state_t os; + union drbd_state_t ns; + enum chg_state_flags flags; + struct completion *done; +}; + +int drbdd_init(struct Drbd_thread *); +int drbd_worker(struct Drbd_thread *); +int drbd_asender(struct Drbd_thread *); + +int drbd_init(void); +static int drbd_open(struct block_device *bdev, fmode_t mode); +static int drbd_release(struct gendisk *gd, fmode_t mode); +STATIC int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); +STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state_t os, + union drbd_state_t ns, enum chg_state_flags flags); +STATIC int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); +STATIC void md_sync_timer_fn(unsigned long data); +STATIC int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); + +MODULE_AUTHOR("Philipp Reisner , " + "Lars Ellenberg "); +MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); +MODULE_LICENSE("GPL"); +MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)"); +MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); + +#include +/* allow_open_on_secondary */ +MODULE_PARM_DESC(allow_oos, "DONT USE!"); +/* thanks to these macros, if compiled into the kernel (not-module), + * this becomes the boot parameter drbd.minor_count */ +module_param(minor_count, uint, 0444); +module_param(allow_oos, bool, 0); +module_param(cn_idx, uint, 0444); + +#ifdef DRBD_ENABLE_FAULTS +int enable_faults; +int fault_rate; +static int fault_count; +int fault_devs; +/* bitmap of enabled faults */ +module_param(enable_faults, int, 0664); +/* fault rate % value - applies to all enabled faults */ +module_param(fault_rate, int, 0664); +/* count of faults inserted */ +module_param(fault_count, int, 0664); +/* bitmap of devices to insert faults on */ +module_param(fault_devs, int, 0644); +#endif + +/* module parameter, defined */ +unsigned int minor_count = 32; +int allow_oos; +unsigned int cn_idx = CN_IDX_DRBD; + +#ifdef ENABLE_DYNAMIC_TRACE +int trace_type; /* Bitmap of trace types to enable */ +int trace_level; /* Current trace level */ +int trace_devs; /* Bitmap of devices to trace */ +int proc_details; /* Detail level in proc drbd*/ + +module_param(trace_level, int, 0644); +module_param(trace_type, int, 0644); +module_param(trace_devs, int, 0644); +module_param(proc_details, int, 0644); +#endif + +/* Module parameter for setting the user mode helper program + * to run. Default is /sbin/drbdadm */ +char usermode_helper[80] = "/sbin/drbdadm"; + +module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644); + +/* in 2.6.x, our device mapping and config info contains our virtual gendisks + * as member "struct gendisk *vdisk;" + */ +struct drbd_conf **minor_table; + +struct kmem_cache *drbd_request_cache; +struct kmem_cache *drbd_ee_cache; +mempool_t *drbd_request_mempool; +mempool_t *drbd_ee_mempool; + +/* I do not use a standard mempool, because: + 1) I want to hand out the preallocated objects first. + 2) I want to be able to interrupt sleeping allocation with a signal. + Note: This is a single linked list, the next pointer is the private + member of struct page. + */ +struct page *drbd_pp_pool; +spinlock_t drbd_pp_lock; +int drbd_pp_vacant; +wait_queue_head_t drbd_pp_wait; + +DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5); + +STATIC struct block_device_operations drbd_ops = { + .owner = THIS_MODULE, + .open = drbd_open, + .release = drbd_release, +}; + +#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0])) + +#ifdef __CHECKER__ +/* When checking with sparse, and this is an inline function, sparse will + give tons of false positives. When this is a real functions sparse works. + */ +int _inc_local_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) +{ + int io_allowed; + + atomic_inc(&mdev->local_cnt); + io_allowed = (mdev->state.disk >= mins); + if (!io_allowed) { + if (atomic_dec_and_test(&mdev->local_cnt)) + wake_up(&mdev->misc_wait); + } + return io_allowed; +} + +#endif + +/************************* The transfer log start */ +STATIC int tl_init(struct drbd_conf *mdev) +{ + struct drbd_barrier *b; + + b = kmalloc(sizeof(struct drbd_barrier), GFP_KERNEL); + if (!b) + return 0; + INIT_LIST_HEAD(&b->requests); + INIT_LIST_HEAD(&b->w.list); + b->next = NULL; + b->br_number = 4711; + b->n_req = 0; + b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ + + mdev->oldest_barrier = b; + mdev->newest_barrier = b; + INIT_LIST_HEAD(&mdev->out_of_sequence_requests); + + mdev->tl_hash = NULL; + mdev->tl_hash_s = 0; + + return 1; +} + +STATIC void tl_cleanup(struct drbd_conf *mdev) +{ + D_ASSERT(mdev->oldest_barrier == mdev->newest_barrier); + D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); + kfree(mdev->oldest_barrier); + mdev->oldest_barrier = NULL; + kfree(mdev->unused_spare_barrier); + mdev->unused_spare_barrier = NULL; + kfree(mdev->tl_hash); + mdev->tl_hash = NULL; + mdev->tl_hash_s = 0; +} + +/** + * _tl_add_barrier: Adds a barrier to the TL. + */ +void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_barrier *new) +{ + struct drbd_barrier *newest_before; + + INIT_LIST_HEAD(&new->requests); + INIT_LIST_HEAD(&new->w.list); + new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ + new->next = NULL; + new->n_req = 0; + + newest_before = mdev->newest_barrier; + /* never send a barrier number == 0, because that is special-cased + * when using TCQ for our write ordering code */ + new->br_number = (newest_before->br_number+1) ?: 1; + if (mdev->newest_barrier != new) { + mdev->newest_barrier->next = new; + mdev->newest_barrier = new; + } +} + +/* when we receive a barrier ack */ +void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, + unsigned int set_size) +{ + struct drbd_barrier *b, *nob; /* next old barrier */ + struct list_head *le, *tle; + struct drbd_request *r; + + spin_lock_irq(&mdev->req_lock); + + b = mdev->oldest_barrier; + + /* first some paranoia code */ + if (b == NULL) { + ERR("BAD! BarrierAck #%u received, but no epoch in tl!?\n", + barrier_nr); + goto bail; + } + if (b->br_number != barrier_nr) { + ERR("BAD! BarrierAck #%u received, expected #%u!\n", + barrier_nr, b->br_number); + goto bail; + } + if (b->n_req != set_size) { + ERR("BAD! BarrierAck #%u received with n_req=%u, expected n_req=%u!\n", + barrier_nr, set_size, b->n_req); + goto bail; + } + + /* Clean up list of requests processed during current epoch */ + list_for_each_safe(le, tle, &b->requests) { + r = list_entry(le, struct drbd_request, tl_requests); + _req_mod(r, barrier_acked, 0); + } + /* There could be requests on the list waiting for completion + of the write to the local disk. To avoid corruptions of + slab's data structures we have to remove the lists head. + + Also there could have been a barrier ack out of sequence, overtaking + the write acks - which would be a but and violating write ordering. + To not deadlock in case we lose connection while such requests are + still pending, we need some way to find them for the + _req_mode(connection_lost_while_pending). + + These have been list_move'd to the out_of_sequence_requests list in + _req_mod(, barrier_acked,) above. + */ + list_del_init(&b->requests); + + nob = b->next; + if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { + _tl_add_barrier(mdev, b); + if (nob) + mdev->oldest_barrier = nob; + /* if nob == NULL b was the only barrier, and becomes the new + barrer. Threfore mdev->oldest_barrier points already to b */ + } else { + D_ASSERT(nob != NULL); + mdev->oldest_barrier = nob; + kfree(b); + } + + spin_unlock_irq(&mdev->req_lock); + dec_ap_pending(mdev); + + return; + +bail: + spin_unlock_irq(&mdev->req_lock); + drbd_force_state(mdev, NS(conn, ProtocolError)); +} + + +/* called by drbd_disconnect (exiting receiver thread) + * or from some after_state_ch */ +void tl_clear(struct drbd_conf *mdev) +{ + struct drbd_barrier *b, *tmp; + struct list_head *le, *tle; + struct drbd_request *r; + int new_initial_bnr = net_random(); + + spin_lock_irq(&mdev->req_lock); + + b = mdev->oldest_barrier; + while (b) { + list_for_each_safe(le, tle, &b->requests) { + r = list_entry(le, struct drbd_request, tl_requests); + _req_mod(r, connection_lost_while_pending, 0); + } + tmp = b->next; + + /* there could still be requests on that ring list, + * in case local io is still pending */ + list_del(&b->requests); + + /* dec_ap_pending corresponding to queue_barrier. + * the newest barrier may not have been queued yet, + * in which case w.cb is still NULL. */ + if (b->w.cb != NULL) + dec_ap_pending(mdev); + + if (b == mdev->newest_barrier) { + /* recycle, but reinit! */ + D_ASSERT(tmp == NULL); + INIT_LIST_HEAD(&b->requests); + INIT_LIST_HEAD(&b->w.list); + b->w.cb = NULL; + b->br_number = new_initial_bnr; + b->n_req = 0; + + mdev->oldest_barrier = b; + break; + } + kfree(b); + b = tmp; + } + + /* we expect this list to be empty. */ + D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); + + /* but just in case, clean it up anyways! */ + list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) { + r = list_entry(le, struct drbd_request, tl_requests); + _req_mod(r, connection_lost_while_pending, 0); + } + + /* ensure bit indicating barrier is required is clear */ + clear_bit(CREATE_BARRIER, &mdev->flags); + + spin_unlock_irq(&mdev->req_lock); +} + +/** + * drbd_io_error: Handles the on_io_error setting, should be called in the + * unlikely(!drbd_bio_uptodate(e->bio)) case from kernel thread context. + * See also drbd_chk_io_error + * + * NOTE: we set ourselves FAILED here if on_io_error is Detach or Panic OR + * if the forcedetach flag is set. This flag is set when failures + * occur writing the meta data portion of the disk as they are + * not recoverable. + */ +int drbd_io_error(struct drbd_conf *mdev, int forcedetach) +{ + enum io_error_handler eh; + unsigned long flags; + int send; + int ok = 1; + + eh = PassOn; + if (inc_local_if_state(mdev, Failed)) { + eh = mdev->bc->dc.on_io_error; + dec_local(mdev); + } + + if (!forcedetach && eh == PassOn) + return 1; + + spin_lock_irqsave(&mdev->req_lock, flags); + send = (mdev->state.disk == Failed); + if (send) + _drbd_set_state(_NS(mdev, disk, Diskless), ChgStateHard, NULL); + spin_unlock_irqrestore(&mdev->req_lock, flags); + + if (!send) + return ok; + + if (mdev->state.conn >= Connected) { + ok = drbd_send_state(mdev); + if (ok) + drbd_WARN("Notified peer that my disk is broken.\n"); + else + ERR("Sending state in drbd_io_error() failed\n"); + } + + /* Make sure we try to flush meta-data to disk - we come + * in here because of a local disk error so it might fail + * but we still need to try -- both because the error might + * be in the data portion of the disk and because we need + * to ensure the md-sync-timer is stopped if running. */ + drbd_md_sync(mdev); + + /* Releasing the backing device is done in after_state_ch() */ + + if (eh == CallIOEHelper) + drbd_khelper(mdev, "local-io-error"); + + return ok; +} + +/** + * cl_wide_st_chg: + * Returns TRUE if this state change should be preformed as a cluster wide + * transaction. Of course it returns 0 as soon as the connection is lost. + */ +STATIC int cl_wide_st_chg(struct drbd_conf *mdev, + union drbd_state_t os, union drbd_state_t ns) +{ + return (os.conn >= Connected && ns.conn >= Connected && + ((os.role != Primary && ns.role == Primary) || + (os.conn != StartingSyncT && ns.conn == StartingSyncT) || + (os.conn != StartingSyncS && ns.conn == StartingSyncS) || + (os.disk != Diskless && ns.disk == Diskless))) || + (os.conn >= Connected && ns.conn == Disconnecting) || + (os.conn == Connected && ns.conn == VerifyS); +} + +int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, + union drbd_state_t mask, union drbd_state_t val) +{ + unsigned long flags; + union drbd_state_t os, ns; + int rv; + + spin_lock_irqsave(&mdev->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + rv = _drbd_set_state(mdev, ns, f, NULL); + ns = mdev->state; + spin_unlock_irqrestore(&mdev->req_lock, flags); + + return rv; +} + +void drbd_force_state(struct drbd_conf *mdev, + union drbd_state_t mask, union drbd_state_t val) +{ + drbd_change_state(mdev, ChgStateHard, mask, val); +} + +int is_valid_state(struct drbd_conf *mdev, union drbd_state_t ns); +int is_valid_state_transition(struct drbd_conf *, + union drbd_state_t, union drbd_state_t); +int drbd_send_state_req(struct drbd_conf *, + union drbd_state_t, union drbd_state_t); + +STATIC enum set_st_err _req_st_cond(struct drbd_conf *mdev, + union drbd_state_t mask, union drbd_state_t val) +{ + union drbd_state_t os, ns; + unsigned long flags; + int rv; + + if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) + return SS_CW_Success; + + if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) + return SS_CW_FailedByPeer; + + rv = 0; + spin_lock_irqsave(&mdev->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + if (!cl_wide_st_chg(mdev, os, ns)) + rv = SS_CW_NoNeed; + if (!rv) { + rv = is_valid_state(mdev, ns); + if (rv == SS_Success) { + rv = is_valid_state_transition(mdev, ns, os); + if (rv == SS_Success) + rv = 0; /* cont waiting, otherwise fail. */ + } + } + spin_unlock_irqrestore(&mdev->req_lock, flags); + + return rv; +} + +/** + * _drbd_request_state: + * This function is the most gracefull way to change state. For some state + * transition this function even does a cluster wide transaction. + * It has a cousin named drbd_request_state(), which is always verbose. + */ +STATIC int drbd_req_state(struct drbd_conf *mdev, + union drbd_state_t mask, union drbd_state_t val, + enum chg_state_flags f) +{ + struct completion done; + unsigned long flags; + union drbd_state_t os, ns; + int rv; + + init_completion(&done); + + if (f & ChgSerialize) + mutex_lock(&mdev->state_mutex); + + spin_lock_irqsave(&mdev->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + + if (cl_wide_st_chg(mdev, os, ns)) { + rv = is_valid_state(mdev, ns); + if (rv == SS_Success) + rv = is_valid_state_transition(mdev, ns, os); + spin_unlock_irqrestore(&mdev->req_lock, flags); + + if (rv < SS_Success) { + if (f & ChgStateVerbose) + print_st_err(mdev, os, ns, rv); + goto abort; + } + + drbd_state_lock(mdev); + if (!drbd_send_state_req(mdev, mask, val)) { + drbd_state_unlock(mdev); + rv = SS_CW_FailedByPeer; + if (f & ChgStateVerbose) + print_st_err(mdev, os, ns, rv); + goto abort; + } + + wait_event(mdev->state_wait, + (rv = _req_st_cond(mdev, mask, val))); + + if (rv < SS_Success) { + /* nearly dead code. */ + drbd_state_unlock(mdev); + if (f & ChgStateVerbose) + print_st_err(mdev, os, ns, rv); + goto abort; + } + spin_lock_irqsave(&mdev->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + rv = _drbd_set_state(mdev, ns, f, &done); + drbd_state_unlock(mdev); + } else { + rv = _drbd_set_state(mdev, ns, f, &done); + } + + spin_unlock_irqrestore(&mdev->req_lock, flags); + + if (f & ChgWaitComplete && rv == SS_Success) { + D_ASSERT(current != mdev->worker.task); + wait_for_completion(&done); + } + +abort: + if (f & ChgSerialize) + mutex_unlock(&mdev->state_mutex); + + return rv; +} + +/** + * _drbd_request_state: + * This function is the most gracefull way to change state. For some state + * transition this function even does a cluster wide transaction. + * It has a cousin named drbd_request_state(), which is always verbose. + */ +int _drbd_request_state(struct drbd_conf *mdev, union drbd_state_t mask, + union drbd_state_t val, enum chg_state_flags f) +{ + int rv; + + wait_event(mdev->state_wait, + (rv = drbd_req_state(mdev, mask, val, f)) != SS_InTransientState); + + return rv; +} + +STATIC void print_st(struct drbd_conf *mdev, char *name, union drbd_state_t ns) +{ + ERR(" %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n", + name, + conns_to_name(ns.conn), + roles_to_name(ns.role), + roles_to_name(ns.peer), + disks_to_name(ns.disk), + disks_to_name(ns.pdsk), + ns.susp ? 's' : 'r', + ns.aftr_isp ? 'a' : '-', + ns.peer_isp ? 'p' : '-', + ns.user_isp ? 'u' : '-' + ); +} + +void print_st_err(struct drbd_conf *mdev, + union drbd_state_t os, union drbd_state_t ns, int err) +{ + if (err == SS_InTransientState) + return; + ERR("State change failed: %s\n", set_st_err_name(err)); + print_st(mdev, " state", os); + print_st(mdev, "wanted", ns); +} + + +#define peers_to_name roles_to_name +#define pdsks_to_name disks_to_name + +#define susps_to_name(A) ((A) ? "1" : "0") +#define aftr_isps_to_name(A) ((A) ? "1" : "0") +#define peer_isps_to_name(A) ((A) ? "1" : "0") +#define user_isps_to_name(A) ((A) ? "1" : "0") + +#define PSC(A) \ + ({ if (ns.A != os.A) { \ + pbp += sprintf(pbp, #A "( %s -> %s ) ", \ + A##s_to_name(os.A), \ + A##s_to_name(ns.A)); \ + } }) + +int is_valid_state(struct drbd_conf *mdev, union drbd_state_t ns) +{ + /* See drbd_state_sw_errors in drbd_strings.c */ + + enum fencing_policy fp; + int rv = SS_Success; + + fp = DontCare; + if (inc_local(mdev)) { + fp = mdev->bc->dc.fencing; + dec_local(mdev); + } + + if (inc_net(mdev)) { + if (!mdev->net_conf->two_primaries && + ns.role == Primary && ns.peer == Primary) + rv = SS_TwoPrimaries; + dec_net(mdev); + } + + if (rv <= 0) + /* already found a reason to abort */; + else if (ns.role == Secondary && mdev->open_cnt) + rv = SS_DeviceInUse; + + else if (ns.role == Primary && ns.conn < Connected && ns.disk < UpToDate) + rv = SS_NoUpToDateDisk; + + else if (fp >= Resource && + ns.role == Primary && ns.conn < Connected && ns.pdsk >= DUnknown) + rv = SS_PrimaryNOP; + + else if (ns.role == Primary && ns.disk <= Inconsistent && ns.pdsk <= Inconsistent) + rv = SS_NoUpToDateDisk; + + else if (ns.conn > Connected && ns.disk < UpToDate && ns.pdsk < UpToDate) + rv = SS_BothInconsistent; + + else if (ns.conn > Connected && (ns.disk == Diskless || ns.pdsk == Diskless)) + rv = SS_SyncingDiskless; + + else if ((ns.conn == Connected || + ns.conn == WFBitMapS || + ns.conn == SyncSource || + ns.conn == PausedSyncS) && + ns.disk == Outdated) + rv = SS_ConnectedOutdates; + + else if ((ns.conn == VerifyS || ns.conn == VerifyT) && + (mdev->sync_conf.verify_alg[0] == 0)) + rv = SS_NoVerifyAlg; + + else if ((ns.conn == VerifyS || ns.conn == VerifyT) && + mdev->agreed_pro_version < 88) + rv = SS_NotSupported; + + return rv; +} + +int is_valid_state_transition(struct drbd_conf *mdev, + union drbd_state_t ns, union drbd_state_t os) +{ + int rv = SS_Success; + + if ((ns.conn == StartingSyncT || ns.conn == StartingSyncS) && + os.conn > Connected) + rv = SS_ResyncRunning; + + if (ns.conn == Disconnecting && os.conn == StandAlone) + rv = SS_AlreadyStandAlone; + + if (ns.disk > Attaching && os.disk == Diskless) + rv = SS_IsDiskLess; + + if (ns.conn == WFConnection && os.conn < Unconnected) + rv = SS_NoNetConfig; + + if (ns.disk == Outdated && os.disk < Outdated && os.disk != Attaching) + rv = SS_LowerThanOutdated; + + if (ns.conn == Disconnecting && os.conn == Unconnected) + rv = SS_InTransientState; + + if (ns.conn == os.conn && ns.conn == WFReportParams) + rv = SS_InTransientState; + + if ((ns.conn == VerifyS || ns.conn == VerifyT) && os.conn < Connected) + rv = SS_NeedConnection; + + if ((ns.conn == VerifyS || ns.conn == VerifyT) && + ns.conn != os.conn && os.conn > Connected) + rv = SS_ResyncRunning; + + if ((ns.conn == StartingSyncS || ns.conn == StartingSyncT) && + os.conn < Connected) + rv = SS_NeedConnection; + + return rv; +} + +int __drbd_set_state(struct drbd_conf *mdev, + union drbd_state_t ns, enum chg_state_flags flags, + struct completion *done) +{ + union drbd_state_t os; + int rv = SS_Success; + int warn_sync_abort = 0; + enum fencing_policy fp; + struct after_state_chg_work *ascw; + + + os = mdev->state; + + fp = DontCare; + if (inc_local(mdev)) { + fp = mdev->bc->dc.fencing; + dec_local(mdev); + } + + /* Early state sanitising. */ + + /* Dissalow Network errors to configure a device's network part */ + if ((ns.conn >= Timeout && ns.conn <= TearDown) && + os.conn <= Disconnecting) + ns.conn = os.conn; + + /* After a network error (+TearDown) only Unconnected or Disconnecting can follow */ + if (os.conn >= Timeout && os.conn <= TearDown && + ns.conn != Unconnected && ns.conn != Disconnecting) + ns.conn = os.conn; + + /* After Disconnecting only StandAlone may follow */ + if (os.conn == Disconnecting && ns.conn != StandAlone) + ns.conn = os.conn; + + if (ns.conn < Connected) { + ns.peer_isp = 0; + ns.peer = Unknown; + if (ns.pdsk > DUnknown || ns.pdsk < Inconsistent) + ns.pdsk = DUnknown; + } + + if (ns.conn <= Disconnecting && ns.disk == Diskless) + ns.pdsk = DUnknown; + + if (os.conn > Connected && ns.conn > Connected && + (ns.disk <= Failed || ns.pdsk <= Failed)) { + warn_sync_abort = 1; + ns.conn = Connected; + } + + if (ns.conn >= Connected && + ((ns.disk == Consistent || ns.disk == Outdated) || + (ns.disk == Negotiating && ns.conn == WFBitMapT))) { + switch (ns.conn) { + case WFBitMapT: + case PausedSyncT: + ns.disk = Outdated; + break; + case Connected: + case WFBitMapS: + case SyncSource: + case PausedSyncS: + ns.disk = UpToDate; + break; + case SyncTarget: + ns.disk = Inconsistent; + drbd_WARN("Implicit set disk state Inconsistent!\n"); + break; + } + if (os.disk == Outdated && ns.disk == UpToDate) + drbd_WARN("Implicit set disk from Outdate to UpToDate\n"); + } + + if (ns.conn >= Connected && + (ns.pdsk == Consistent || ns.pdsk == Outdated)) { + switch (ns.conn) { + case Connected: + case WFBitMapT: + case PausedSyncT: + case SyncTarget: + ns.pdsk = UpToDate; + break; + case WFBitMapS: + case PausedSyncS: + ns.pdsk = Outdated; + break; + case SyncSource: + ns.pdsk = Inconsistent; + drbd_WARN("Implicit set pdsk Inconsistent!\n"); + break; + } + if (os.pdsk == Outdated && ns.pdsk == UpToDate) + drbd_WARN("Implicit set pdsk from Outdate to UpToDate\n"); + } + + /* Connection breaks down before we finished "Negotiating" */ + if (ns.conn < Connected && ns.disk == Negotiating && + inc_local_if_state(mdev, Negotiating)) { + if (mdev->ed_uuid == mdev->bc->md.uuid[Current]) { + ns.disk = mdev->new_state_tmp.disk; + ns.pdsk = mdev->new_state_tmp.pdsk; + } else { + ALERT("Connection lost while negotiating, no data!\n"); + ns.disk = Diskless; + ns.pdsk = DUnknown; + } + dec_local(mdev); + } + + if (fp == Stonith && + (ns.role == Primary && + ns.conn < Connected && + ns.pdsk > Outdated)) + ns.susp = 1; + + if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { + if (ns.conn == SyncSource) + ns.conn = PausedSyncS; + if (ns.conn == SyncTarget) + ns.conn = PausedSyncT; + } else { + if (ns.conn == PausedSyncS) + ns.conn = SyncSource; + if (ns.conn == PausedSyncT) + ns.conn = SyncTarget; + } + + if (ns.i == os.i) + return SS_NothingToDo; + + if (!(flags & ChgStateHard)) { + /* pre-state-change checks ; only look at ns */ + /* See drbd_state_sw_errors in drbd_strings.c */ + + rv = is_valid_state(mdev, ns); + if (rv < SS_Success) { + /* If the old state was illegal as well, then let + this happen...*/ + + if (is_valid_state(mdev, os) == rv) { + ERR("Considering state change from bad state. " + "Error would be: '%s'\n", + set_st_err_name(rv)); + print_st(mdev, "old", os); + print_st(mdev, "new", ns); + rv = is_valid_state_transition(mdev, ns, os); + } + } else + rv = is_valid_state_transition(mdev, ns, os); + } + + if (rv < SS_Success) { + if (flags & ChgStateVerbose) + print_st_err(mdev, os, ns, rv); + return rv; + } + + if (warn_sync_abort) + drbd_WARN("Resync aborted.\n"); + + { + char *pbp, pb[300]; + pbp = pb; + *pbp = 0; + PSC(role); + PSC(peer); + PSC(conn); + PSC(disk); + PSC(pdsk); + PSC(susp); + PSC(aftr_isp); + PSC(peer_isp); + PSC(user_isp); + INFO("%s\n", pb); + } + + mdev->state.i = ns.i; + wake_up(&mdev->misc_wait); + wake_up(&mdev->state_wait); + + /** post-state-change actions **/ + if (os.conn >= SyncSource && ns.conn <= Connected) { + set_bit(STOP_SYNC_TIMER, &mdev->flags); + mod_timer(&mdev->resync_timer, jiffies); + } + + if ((os.conn == PausedSyncT || os.conn == PausedSyncS) && + (ns.conn == SyncTarget || ns.conn == SyncSource)) { + INFO("Syncer continues.\n"); + mdev->rs_paused += (long)jiffies-(long)mdev->rs_mark_time; + if (ns.conn == SyncTarget) { + if (!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags)) + mod_timer(&mdev->resync_timer, jiffies); + /* This if (!test_bit) is only needed for the case + that a device that has ceased to used its timer, + i.e. it is already in drbd_resync_finished() gets + paused and resumed. */ + } + } + + if ((os.conn == SyncTarget || os.conn == SyncSource) && + (ns.conn == PausedSyncT || ns.conn == PausedSyncS)) { + INFO("Resync suspended\n"); + mdev->rs_mark_time = jiffies; + if (ns.conn == PausedSyncT) + set_bit(STOP_SYNC_TIMER, &mdev->flags); + } + + if (os.conn == Connected && + (ns.conn == VerifyS || ns.conn == VerifyT)) { + mdev->ov_position = 0; + mdev->ov_left = + mdev->rs_total = + mdev->rs_mark_left = drbd_bm_bits(mdev); + mdev->rs_start = + mdev->rs_mark_time = jiffies; + mdev->ov_last_oos_size = 0; + mdev->ov_last_oos_start = 0; + + if (ns.conn == VerifyS) + mod_timer(&mdev->resync_timer, jiffies); + } + + if (inc_local(mdev)) { + u32 mdf = mdev->bc->md.flags & ~(MDF_Consistent|MDF_PrimaryInd| + MDF_ConnectedInd|MDF_WasUpToDate| + MDF_PeerOutDated|MDF_CrashedPrimary); + + if (test_bit(CRASHED_PRIMARY, &mdev->flags)) + mdf |= MDF_CrashedPrimary; + if (mdev->state.role == Primary || + (mdev->state.pdsk < Inconsistent && mdev->state.peer == Primary)) + mdf |= MDF_PrimaryInd; + if (mdev->state.conn > WFReportParams) + mdf |= MDF_ConnectedInd; + if (mdev->state.disk > Inconsistent) + mdf |= MDF_Consistent; + if (mdev->state.disk > Outdated) + mdf |= MDF_WasUpToDate; + if (mdev->state.pdsk <= Outdated && mdev->state.pdsk >= Inconsistent) + mdf |= MDF_PeerOutDated; + if (mdf != mdev->bc->md.flags) { + mdev->bc->md.flags = mdf; + drbd_md_mark_dirty(mdev); + } + if (os.disk < Consistent && ns.disk >= Consistent) + drbd_set_ed_uuid(mdev, mdev->bc->md.uuid[Current]); + dec_local(mdev); + } + + /* Peer was forced UpToDate & Primary, consider to resync */ + if (os.disk == Inconsistent && os.pdsk == Inconsistent && + os.peer == Secondary && ns.peer == Primary) + set_bit(CONSIDER_RESYNC, &mdev->flags); + + /* Receiver should clean up itself */ + if (os.conn != Disconnecting && ns.conn == Disconnecting) + drbd_thread_stop_nowait(&mdev->receiver); + + /* Now the receiver finished cleaning up itself, it should die */ + if (os.conn != StandAlone && ns.conn == StandAlone) + drbd_thread_stop_nowait(&mdev->receiver); + + /* Upon network failure, we need to restart the receiver. */ + if (os.conn > TearDown && + ns.conn <= TearDown && ns.conn >= Timeout) + drbd_thread_restart_nowait(&mdev->receiver); + + ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); + if (ascw) { + ascw->os = os; + ascw->ns = ns; + ascw->flags = flags; + ascw->w.cb = w_after_state_ch; + ascw->done = done; + drbd_queue_work(&mdev->data.work, &ascw->w); + } else { + drbd_WARN("Could not kmalloc an ascw\n"); + } + + return rv; +} + +STATIC int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct after_state_chg_work *ascw; + + ascw = (struct after_state_chg_work *) w; + after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); + if (ascw->flags & ChgWaitComplete) { + D_ASSERT(ascw->done != NULL); + complete(ascw->done); + } + kfree(ascw); + + return 1; +} + +static void abw_start_sync(struct drbd_conf *mdev, int rv) +{ + if (rv) { + ERR("Writing the bitmap failed not starting resync.\n"); + _drbd_request_state(mdev, NS(conn, Connected), ChgStateVerbose); + return; + } + + switch (mdev->state.conn) { + case StartingSyncT: + _drbd_request_state(mdev, NS(conn, WFSyncUUID), ChgStateVerbose); + break; + case StartingSyncS: + drbd_start_resync(mdev, SyncSource); + break; + } +} + +STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state_t os, + union drbd_state_t ns, enum chg_state_flags flags) +{ + enum fencing_policy fp; + + if (os.conn != Connected && ns.conn == Connected) { + clear_bit(CRASHED_PRIMARY, &mdev->flags); + if (mdev->p_uuid) + mdev->p_uuid[UUID_FLAGS] &= ~((u64)2); + } + + fp = DontCare; + if (inc_local(mdev)) { + fp = mdev->bc->dc.fencing; + dec_local(mdev); + } + + /* Inform userspace about the change... */ + drbd_bcast_state(mdev, ns); + + if (!(os.role == Primary && os.disk < UpToDate && os.pdsk < UpToDate) && + (ns.role == Primary && ns.disk < UpToDate && ns.pdsk < UpToDate)) + drbd_khelper(mdev, "pri-on-incon-degr"); + + /* Here we have the actions that are performed after a + state change. This function might sleep */ + + if (fp == Stonith && ns.susp) { + /* case1: The outdate peer handler is successfull: + * case2: The connection was established again: */ + if ((os.pdsk > Outdated && ns.pdsk <= Outdated) || + (os.conn < Connected && ns.conn >= Connected)) { + tl_clear(mdev); + spin_lock_irq(&mdev->req_lock); + _drbd_set_state(_NS(mdev, susp, 0), ChgStateVerbose, NULL); + spin_unlock_irq(&mdev->req_lock); + } + } + /* Do not change the order of the if above and the two below... */ + if (os.pdsk == Diskless && ns.pdsk > Diskless) { /* attach on the peer */ + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + if (os.conn != WFBitMapS && ns.conn == WFBitMapS) + drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)"); + + /* Lost contact to peer's copy of the data */ + if ((os.pdsk >= Inconsistent && + os.pdsk != DUnknown && + os.pdsk != Outdated) + && (ns.pdsk < Inconsistent || + ns.pdsk == DUnknown || + ns.pdsk == Outdated)) { + kfree(mdev->p_uuid); + mdev->p_uuid = NULL; + if (inc_local(mdev)) { + if ((ns.role == Primary || ns.peer == Primary) && + mdev->bc->md.uuid[Bitmap] == 0 && ns.disk >= UpToDate) { + drbd_uuid_new_current(mdev); + drbd_send_uuids(mdev); + } + dec_local(mdev); + } + } + + if (ns.pdsk < Inconsistent && inc_local(mdev)) { + if (ns.peer == Primary && mdev->bc->md.uuid[Bitmap] == 0) + drbd_uuid_new_current(mdev); + + /* Diskless Peer becomes secondary */ + if (os.peer == Primary && ns.peer == Secondary) + drbd_al_to_on_disk_bm(mdev); + dec_local(mdev); + } + + /* Last part of the attaching process ... */ + if (ns.conn >= Connected && + os.disk == Attaching && ns.disk == Negotiating) { + kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */ + mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */ + drbd_send_sizes(mdev); /* to start sync... */ + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + + /* We want to pause/continue resync, tell peer. */ + if (ns.conn >= Connected && + ((os.aftr_isp != ns.aftr_isp) || + (os.user_isp != ns.user_isp))) + drbd_send_state(mdev); + + /* In case one of the isp bits got set, suspend other devices. */ + if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && + (ns.aftr_isp || ns.peer_isp || ns.user_isp)) + suspend_other_sg(mdev); + + /* Make sure the peer gets informed about eventual state + changes (ISP bits) while we were in WFReportParams. */ + if (os.conn == WFReportParams && ns.conn >= Connected) + drbd_send_state(mdev); + + /* We are in the progress to start a full sync... */ + if ((os.conn != StartingSyncT && ns.conn == StartingSyncT) || + (os.conn != StartingSyncS && ns.conn == StartingSyncS)) + drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync"); + + /* We are invalidating our self... */ + if (os.conn < Connected && ns.conn < Connected && + os.disk > Inconsistent && ns.disk == Inconsistent) + drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); + + if (os.disk > Diskless && ns.disk == Diskless) { + /* since inc_local() only works as long as disk>=Inconsistent, + and it is Diskless here, local_cnt can only go down, it can + not increase... It will reach zero */ + wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + + lc_free(mdev->resync); + mdev->resync = NULL; + lc_free(mdev->act_log); + mdev->act_log = NULL; + __no_warn(local, drbd_free_bc(mdev->bc);); + wmb(); /* see begin of drbd_nl_disk_conf() */ + __no_warn(local, mdev->bc = NULL;); + + if (mdev->md_io_tmpp) + __free_page(mdev->md_io_tmpp); + } + + /* Disks got bigger while they were detached */ + if (ns.disk > Negotiating && ns.pdsk > Negotiating && + test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { + if (ns.conn == Connected) + resync_after_online_grow(mdev); + } + + /* A resync finished or aborted, wake paused devices... */ + if ((os.conn > Connected && ns.conn <= Connected) || + (os.peer_isp && !ns.peer_isp) || + (os.user_isp && !ns.user_isp)) + resume_next_sg(mdev); + + /* Upon network connection, we need to start the received */ + if (os.conn == StandAlone && ns.conn == Unconnected) + drbd_thread_start(&mdev->receiver); + + /* Terminate worker thread if we are unconfigured - it will be + restarted as needed... */ + if (ns.disk == Diskless && ns.conn == StandAlone && ns.role == Secondary) + drbd_thread_stop_nowait(&mdev->worker); + + drbd_md_sync(mdev); +} + + +STATIC int drbd_thread_setup(void *arg) +{ + struct Drbd_thread *thi = (struct Drbd_thread *) arg; + struct drbd_conf *mdev = thi->mdev; + int retval; + +restart: + retval = thi->function(thi); + + spin_lock(&thi->t_lock); + + /* if the receiver has been "Exiting", the last thing it did + * was set the conn state to "StandAlone", + * if now a re-connect request comes in, conn state goes Unconnected, + * and receiver thread will be "started". + * drbd_thread_start needs to set "Restarting" in that case. + * t_state check and assignement needs to be within the same spinlock, + * so either thread_start sees Exiting, and can remap to Restarting, + * or thread_start see None, and can proceed as normal. + */ + + if (thi->t_state == Restarting) { + INFO("Restarting %s\n", current->comm); + thi->t_state = Running; + spin_unlock(&thi->t_lock); + goto restart; + } + + thi->task = NULL; + thi->t_state = None; + smp_mb(); + complete(&thi->stop); + spin_unlock(&thi->t_lock); + + INFO("Terminating %s\n", current->comm); + + /* Release mod reference taken when thread was started */ + module_put(THIS_MODULE); + return retval; +} + +STATIC void drbd_thread_init(struct drbd_conf *mdev, struct Drbd_thread *thi, + int (*func) (struct Drbd_thread *)) +{ + spin_lock_init(&thi->t_lock); + thi->task = NULL; + thi->t_state = None; + thi->function = func; + thi->mdev = mdev; +} + +int drbd_thread_start(struct Drbd_thread *thi) +{ + struct drbd_conf *mdev = thi->mdev; + struct task_struct *nt; + const char *me = + thi == &mdev->receiver ? "receiver" : + thi == &mdev->asender ? "asender" : + thi == &mdev->worker ? "worker" : "NONSENSE"; + + spin_lock(&thi->t_lock); + switch (thi->t_state) { + case None: + INFO("Starting %s thread (from %s [%d])\n", + me, current->comm, current->pid); + + /* Get ref on module for thread - this is released when thread exits */ + if (!try_module_get(THIS_MODULE)) { + ERR("Failed to get module reference in drbd_thread_start\n"); + spin_unlock(&thi->t_lock); + return FALSE; + } + + D_ASSERT(thi->task == NULL); + thi->reset_cpu_mask = 1; + thi->t_state = Running; + spin_unlock(&thi->t_lock); + flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ + + nt = kthread_create(drbd_thread_setup, (void *) thi, + "drbd%d_%s", mdev_to_minor(mdev), me); + + if (IS_ERR(nt)) { + ERR("Couldn't start thread\n"); + + module_put(THIS_MODULE); + return FALSE; + } + spin_lock(&thi->t_lock); + thi->task = nt; + thi->t_state = Running; + spin_unlock(&thi->t_lock); + wake_up_process(nt); + break; + case Exiting: + thi->t_state = Restarting; + INFO("Restarting %s thread (from %s [%d])\n", + me, current->comm, current->pid); + case Running: + case Restarting: + default: + spin_unlock(&thi->t_lock); + break; + } + + return TRUE; +} + + +void _drbd_thread_stop(struct Drbd_thread *thi, int restart, int wait) +{ + enum Drbd_thread_state ns = restart ? Restarting : Exiting; + + spin_lock(&thi->t_lock); + + if (thi->t_state == None) { + spin_unlock(&thi->t_lock); + if (restart) + drbd_thread_start(thi); + return; + } + + if (thi->t_state != ns) { + if (thi->task == NULL) { + spin_unlock(&thi->t_lock); + return; + } + + thi->t_state = ns; + smp_mb(); + init_completion(&thi->stop); + if (thi->task != current) + force_sig(DRBD_SIGKILL, thi->task); + + } + + spin_unlock(&thi->t_lock); + + if (wait) { + wait_for_completion(&thi->stop); + } +} + +#ifdef CONFIG_SMP +/** + * drbd_calc_cpu_mask: Generates CPU masks, sprad over all CPUs. + * Forces all threads of a device onto the same CPU. This is benificial for + * DRBD's performance. May be overwritten by user's configuration. + */ +cpumask_t drbd_calc_cpu_mask(struct drbd_conf *mdev) +{ + int sv, cpu; + cpumask_t av_cpu_m; + + if (cpus_weight(mdev->cpu_mask)) + return mdev->cpu_mask; + + av_cpu_m = cpu_online_map; + sv = mdev_to_minor(mdev) % cpus_weight(av_cpu_m); + + for_each_cpu_mask(cpu, av_cpu_m) { + if (sv-- == 0) + return cpumask_of_cpu(cpu); + } + + /* some kernel versions "forget" to add the (cpumask_t) typecast + * to that macro, which results in "parse error before '{'" ;-> */ + return (cpumask_t) CPU_MASK_ALL; /* Never reached. */ +} + +/* modifies the cpu mask of the _current_ thread, + * call in the "main loop" of _all_ threads. + * no need for any mutex, current won't die prematurely. + */ +void drbd_thread_current_set_cpu(struct drbd_conf *mdev) +{ + struct task_struct *p = current; + struct Drbd_thread *thi = + p == mdev->asender.task ? &mdev->asender : + p == mdev->receiver.task ? &mdev->receiver : + p == mdev->worker.task ? &mdev->worker : + NULL; + ERR_IF(thi == NULL) + return; + if (!thi->reset_cpu_mask) + return; + thi->reset_cpu_mask = 0; + /* preempt_disable(); + Thas was a kernel that warned about a call to smp_processor_id() while preemt + was not disabled. It seems that this was fixed in manline. */ + set_cpus_allowed(p, mdev->cpu_mask); + /* preempt_enable(); */ +} +#endif + +/* the appropriate socket mutex must be held already */ +int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, + enum Drbd_Packet_Cmd cmd, struct Drbd_Header *h, + size_t size, unsigned msg_flags) +{ + int sent, ok; + + ERR_IF(!h) return FALSE; + ERR_IF(!size) return FALSE; + + h->magic = BE_DRBD_MAGIC; + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be16(size-sizeof(struct Drbd_Header)); + + dump_packet(mdev, sock, 0, (void *)h, __FILE__, __LINE__); + sent = drbd_send(mdev, sock, h, size, msg_flags); + + ok = (sent == size); + if (!ok) + ERR("short sent %s size=%d sent=%d\n", + cmdname(cmd), (int)size, sent); + return ok; +} + +/* don't pass the socket. we may only look at it + * when we hold the appropriate socket mutex. + */ +int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, + enum Drbd_Packet_Cmd cmd, struct Drbd_Header *h, size_t size) +{ + int ok = 0; + struct socket *sock; + + if (use_data_socket) { + mutex_lock(&mdev->data.mutex); + sock = mdev->data.socket; + } else { + mutex_lock(&mdev->meta.mutex); + sock = mdev->meta.socket; + } + + /* drbd_disconnect() could have called drbd_free_sock() + * while we were waiting in down()... */ + if (likely(sock != NULL)) + ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0); + + if (use_data_socket) + mutex_unlock(&mdev->data.mutex); + else + mutex_unlock(&mdev->meta.mutex); + return ok; +} + +int drbd_send_cmd2(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, char *data, + size_t size) +{ + struct Drbd_Header h; + int ok; + + h.magic = BE_DRBD_MAGIC; + h.command = cpu_to_be16(cmd); + h.length = cpu_to_be16(size); + + if (!drbd_get_data_sock(mdev)) + return 0; + + dump_packet(mdev, mdev->data.socket, 0, (void *)&h, __FILE__, __LINE__); + + ok = (sizeof(h) == + drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0)); + ok = ok && (size == + drbd_send(mdev, mdev->data.socket, data, size, 0)); + + drbd_put_data_sock(mdev); + + return ok; +} + +int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) +{ + struct Drbd_SyncParam89_Packet *p; + struct socket *sock; + int size, rv; + const int apv = mdev->agreed_pro_version; + + size = apv <= 87 ? sizeof(struct Drbd_SyncParam_Packet) + : apv == 88 ? sizeof(struct Drbd_SyncParam_Packet) + + strlen(mdev->sync_conf.verify_alg) + 1 + : /* 89 */ sizeof(struct Drbd_SyncParam89_Packet); + + /* used from admin command context and receiver/worker context. + * to avoid kmalloc, grab the socket right here, + * then use the pre-allocated sbuf there */ + mutex_lock(&mdev->data.mutex); + sock = mdev->data.socket; + + if (likely(sock != NULL)) { + enum Drbd_Packet_Cmd cmd = apv >= 89 ? SyncParam89 : SyncParam; + + p = &mdev->data.sbuf.SyncParam89; + + /* initialize verify_alg and csums_alg */ + memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); + + p->rate = cpu_to_be32(sc->rate); + + if (apv >= 88) + strcpy(p->verify_alg, mdev->sync_conf.verify_alg); + if (apv >= 89) + strcpy(p->csums_alg, mdev->sync_conf.csums_alg); + + rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0); + } else + rv = 0; /* not ok */ + + mutex_unlock(&mdev->data.mutex); + + return rv; +} + +int drbd_send_protocol(struct drbd_conf *mdev) +{ + struct Drbd_Protocol_Packet *p; + int size, rv; + + size = sizeof(struct Drbd_Protocol_Packet); + + if (mdev->agreed_pro_version >= 87) + size += strlen(mdev->net_conf->integrity_alg) + 1; + + p = kmalloc(size, GFP_KERNEL); + if (p == NULL) + return 0; + + p->protocol = cpu_to_be32(mdev->net_conf->wire_protocol); + p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p); + p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p); + p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p); + p->want_lose = cpu_to_be32(mdev->net_conf->want_lose); + p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries); + + if (mdev->agreed_pro_version >= 87) + strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); + + rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, ReportProtocol, + (struct Drbd_Header *)p, size); + kfree(p); + return rv; +} + +int drbd_send_uuids(struct drbd_conf *mdev) +{ + struct Drbd_GenCnt_Packet p; + int i; + + u64 uuid_flags = 0; + + if (!inc_local_if_state(mdev, Negotiating)) + return 1; + + for (i = Current; i < UUID_SIZE; i++) + p.uuid[i] = mdev->bc ? cpu_to_be64(mdev->bc->md.uuid[i]) : 0; + + mdev->comm_bm_set = drbd_bm_total_weight(mdev); + p.uuid[UUID_SIZE] = cpu_to_be64(mdev->comm_bm_set); + uuid_flags |= mdev->net_conf->want_lose ? 1 : 0; + uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; + uuid_flags |= mdev->new_state_tmp.disk == Inconsistent ? 4 : 0; + p.uuid[UUID_FLAGS] = cpu_to_be64(uuid_flags); + + dec_local(mdev); + + return drbd_send_cmd(mdev, USE_DATA_SOCKET, ReportUUIDs, + (struct Drbd_Header *)&p, sizeof(p)); +} + +int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) +{ + struct Drbd_SyncUUID_Packet p; + + p.uuid = cpu_to_be64(val); + + return drbd_send_cmd(mdev, USE_DATA_SOCKET, ReportSyncUUID, + (struct Drbd_Header *)&p, sizeof(p)); +} + +int drbd_send_sizes(struct drbd_conf *mdev) +{ + struct Drbd_Sizes_Packet p; + sector_t d_size, u_size; + int q_order_type; + int ok; + + if (inc_local_if_state(mdev, Negotiating)) { + D_ASSERT(mdev->bc->backing_bdev); + d_size = drbd_get_max_capacity(mdev->bc); + u_size = mdev->bc->dc.disk_size; + q_order_type = drbd_queue_order_type(mdev); + p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev)); + dec_local(mdev); + } else { + d_size = 0; + u_size = 0; + q_order_type = QUEUE_ORDERED_NONE; + } + + p.d_size = cpu_to_be64(d_size); + p.u_size = cpu_to_be64(u_size); + p.c_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); + p.max_segment_size = cpu_to_be32(mdev->rq_queue->max_segment_size); + p.queue_order_type = cpu_to_be32(q_order_type); + + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, ReportSizes, + (struct Drbd_Header *)&p, sizeof(p)); + return ok; +} + +/** + * drbd_send_state: + * Informs the peer about our state. Only call it when + * mdev->state.conn >= Connected (I.e. you may not call it while in + * WFReportParams. Though there is one valid and necessary exception, + * drbd_connect() calls drbd_send_state() while in it WFReportParams. + */ +int drbd_send_state(struct drbd_conf *mdev) +{ + struct socket *sock; + struct Drbd_State_Packet p; + int ok = 0; + + /* Grab state lock so we wont send state if we're in the middle + * of a cluster wide state change on another thread */ + drbd_state_lock(mdev); + + mutex_lock(&mdev->data.mutex); + + p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ + sock = mdev->data.socket; + + if (likely(sock != NULL)) { + ok = _drbd_send_cmd(mdev, sock, ReportState, + (struct Drbd_Header *)&p, sizeof(p), 0); + } + + mutex_unlock(&mdev->data.mutex); + + drbd_state_unlock(mdev); + return ok; +} + +int drbd_send_state_req(struct drbd_conf *mdev, + union drbd_state_t mask, union drbd_state_t val) +{ + struct Drbd_Req_State_Packet p; + + p.mask = cpu_to_be32(mask.i); + p.val = cpu_to_be32(val.i); + + return drbd_send_cmd(mdev, USE_DATA_SOCKET, StateChgRequest, + (struct Drbd_Header *)&p, sizeof(p)); +} + +int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) +{ + struct Drbd_RqS_Reply_Packet p; + + p.retcode = cpu_to_be32(retcode); + + return drbd_send_cmd(mdev, USE_META_SOCKET, StateChgReply, + (struct Drbd_Header *)&p, sizeof(p)); +} + +/* returns + * positive: number of payload bytes needed in this packet. + * zero: incompressible. */ +int fill_bitmap_rle_bytes(struct drbd_conf *mdev, + struct Drbd_Compressed_Bitmap_Packet *p, + struct bm_xfer_ctx *c) +{ + unsigned long plain_bits; + unsigned long tmp; + unsigned long rl; + void *buffer; + unsigned n; + unsigned len; + unsigned toggle; + + /* may we use this feature? */ + if ((mdev->sync_conf.use_rle_encoding == 0) || + (mdev->agreed_pro_version < 90)) + return 0; + + if (c->bit_offset >= c->bm_bits) + return 0; /* nothing to do. */ + + /* use at most thus many bytes */ + len = BM_PACKET_VLI_BYTES_MAX; + buffer = p->code; + /* plain bits covered in this code string */ + plain_bits = 0; + + /* p->encoding & 0x80 stores whether the first + * run length is set. + * bit offset is implicit. + * start with toggle == 2 to be able to tell the first iteration */ + toggle = 2; + + /* see how much plain bits we can stuff into one packet + * using RLE and VLI. */ + do { + tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset) + : _drbd_bm_find_next(mdev, c->bit_offset); + if (tmp == -1UL) + tmp = c->bm_bits; + rl = tmp - c->bit_offset; + + if (toggle == 2) { /* first iteration */ + if (rl == 0) { + /* the first checked bit was set, + * store start value, */ + DCBP_set_start(p, 1); + /* but skip encoding of zero run length */ + toggle = !toggle; + continue; + } + DCBP_set_start(p, 0); + } + + /* paranoia: catch zero runlength. + * can only happen if bitmap is modified while we scan it. */ + if (rl == 0) { + ERR("unexpected zero runlength while encoding bitmap " + "t:%u bo:%lu\n", toggle, c->bit_offset); + return -1; + } + + n = vli_encode_bytes(buffer, rl, len); + if (n == 0) /* buffer full */ + break; + + toggle = !toggle; + buffer += n; + len -= n; + plain_bits += rl; + c->bit_offset = tmp; + } while (len && c->bit_offset < c->bm_bits); + + len = BM_PACKET_VLI_BYTES_MAX - len; + + if (plain_bits < (len << 3)) { + /* incompressible with this method. + * we need to rewind both word and bit position. */ + c->bit_offset -= plain_bits; + bm_xfer_ctx_bit_to_word_offset(c); + c->bit_offset = c->word_offset * BITS_PER_LONG; + return 0; + } + + /* RLE + VLI was able to compress it just fine. + * update c->word_offset. */ + bm_xfer_ctx_bit_to_word_offset(c); + + /* store pad_bits */ + DCBP_set_pad_bits(p, 0); + + return len; +} + +int fill_bitmap_rle_bits(struct drbd_conf *mdev, + struct Drbd_Compressed_Bitmap_Packet *p, + struct bm_xfer_ctx *c) +{ + struct bitstream bs; + unsigned long plain_bits; + unsigned long tmp; + unsigned long rl; + unsigned len; + unsigned toggle; + int bits; + + /* may we use this feature? */ + if ((mdev->sync_conf.use_rle_encoding == 0) || + (mdev->agreed_pro_version < 90)) + return 0; + + if (c->bit_offset >= c->bm_bits) + return 0; /* nothing to do. */ + + /* use at most thus many bytes */ + bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0); + memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX); + /* plain bits covered in this code string */ + plain_bits = 0; + + /* p->encoding & 0x80 stores whether the first + * run length is set. + * bit offset is implicit. + * start with toggle == 2 to be able to tell the first iteration */ + toggle = 2; + + /* see how much plain bits we can stuff into one packet + * using RLE and VLI. */ + do { + tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset) + : _drbd_bm_find_next(mdev, c->bit_offset); + if (tmp == -1UL) + tmp = c->bm_bits; + rl = tmp - c->bit_offset; + + if (toggle == 2) { /* first iteration */ + if (rl == 0) { + /* the first checked bit was set, + * store start value, */ + DCBP_set_start(p, 1); + /* but skip encoding of zero run length */ + toggle = !toggle; + continue; + } + DCBP_set_start(p, 0); + } + + /* paranoia: catch zero runlength. + * can only happen if bitmap is modified while we scan it. */ + if (rl == 0) { + ERR("unexpected zero runlength while encoding bitmap " + "t:%u bo:%lu\n", toggle, c->bit_offset); + return -1; + } + + bits = vli_encode_bits(&bs, rl); + if (bits == -ENOBUFS) /* buffer full */ + break; + if (bits <= 0) { + ERR("error while encoding bitmap: %d\n", bits); + return 0; + } + + toggle = !toggle; + plain_bits += rl; + c->bit_offset = tmp; + } while (c->bit_offset < c->bm_bits); + + len = bs.cur.b - p->code + !!bs.cur.bit; + + if (plain_bits < (len << 3)) { + /* incompressible with this method. + * we need to rewind both word and bit position. */ + c->bit_offset -= plain_bits; + bm_xfer_ctx_bit_to_word_offset(c); + c->bit_offset = c->word_offset * BITS_PER_LONG; + return 0; + } + + /* RLE + VLI was able to compress it just fine. + * update c->word_offset. */ + bm_xfer_ctx_bit_to_word_offset(c); + + /* store pad_bits */ + DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7); + + return len; +} + +enum { OK, FAILED, DONE } +send_bitmap_rle_or_plain(struct drbd_conf *mdev, + struct Drbd_Header *h, struct bm_xfer_ctx *c) +{ + struct Drbd_Compressed_Bitmap_Packet *p = (void*)h; + unsigned long num_words; + int len; + int ok; + + if (0) + len = fill_bitmap_rle_bytes(mdev, p, c); + else + len = fill_bitmap_rle_bits(mdev, p, c); + + if (len < 0) + return FAILED; + if (len) { + DCBP_set_code(p, 0 ? RLE_VLI_Bytes : RLE_VLI_BitsFibD_3_5); + ok = _drbd_send_cmd(mdev, mdev->data.socket, ReportCBitMap, h, + sizeof(*p) + len, 0); + + c->packets[0]++; + c->bytes[0] += sizeof(*p) + len; + + if (c->bit_offset >= c->bm_bits) + len = 0; /* DONE */ + } else { + /* was not compressible. + * send a buffer full of plain text bits instead. */ + num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); + len = num_words * sizeof(long); + if (len) + drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload); + ok = _drbd_send_cmd(mdev, mdev->data.socket, ReportBitMap, + h, sizeof(struct Drbd_Header) + len, 0); + c->word_offset += num_words; + c->bit_offset = c->word_offset * BITS_PER_LONG; + + c->packets[1]++; + c->bytes[1] += sizeof(struct Drbd_Header) + len; + + if (c->bit_offset > c->bm_bits) + c->bit_offset = c->bm_bits; + } + ok = ok ? ((len == 0) ? DONE : OK) : FAILED; + + if (ok == DONE) + INFO_bm_xfer_stats(mdev, "send", c); + return ok; +} + +/* See the comment at receive_bitmap() */ +int _drbd_send_bitmap(struct drbd_conf *mdev) +{ + struct bm_xfer_ctx c; + struct Drbd_Header *p; + int ret; + + ERR_IF(!mdev->bitmap) return FALSE; + + /* maybe we should use some per thread scratch page, + * and allocate that during initial device creation? */ + p = (struct Drbd_Header *) __get_free_page(GFP_NOIO); + if (!p) { + ERR("failed to allocate one page buffer in %s\n", __func__); + return FALSE; + } + + if (inc_local(mdev)) { + if (drbd_md_test_flag(mdev->bc, MDF_FullSync)) { + INFO("Writing the whole bitmap, MDF_FullSync was set.\n"); + drbd_bm_set_all(mdev); + if (drbd_bm_write(mdev)) { + /* write_bm did fail! Leave full sync flag set in Meta Data + * but otherwise process as per normal - need to tell other + * side that a full resync is required! */ + ERR("Failed to write bitmap to disk!\n"); + } else { + drbd_md_clear_flag(mdev, MDF_FullSync); + drbd_md_sync(mdev); + } + } + dec_local(mdev); + } + + c = (struct bm_xfer_ctx) { + .bm_bits = drbd_bm_bits(mdev), + .bm_words = drbd_bm_words(mdev), + }; + + do { + ret = send_bitmap_rle_or_plain(mdev, p, &c); + } while (ret == OK); + + free_page((unsigned long) p); + return (ret == DONE); +} + +int drbd_send_bitmap(struct drbd_conf *mdev) +{ + int err; + + if (!drbd_get_data_sock(mdev)) + return -1; + err = !_drbd_send_bitmap(mdev); + drbd_put_data_sock(mdev); + return err; +} + +int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) +{ + int ok; + struct Drbd_BarrierAck_Packet p; + + p.barrier = barrier_nr; + p.set_size = cpu_to_be32(set_size); + + if (mdev->state.conn < Connected) + return FALSE; + ok = drbd_send_cmd(mdev, USE_META_SOCKET, BarrierAck, + (struct Drbd_Header *)&p, sizeof(p)); + return ok; +} + +/** + * _drbd_send_ack: + * This helper function expects the sector and block_id parameter already + * in big endian! + */ +STATIC int _drbd_send_ack(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, + u64 sector, + u32 blksize, + u64 block_id) +{ + int ok; + struct Drbd_BlockAck_Packet p; + + p.sector = sector; + p.block_id = block_id; + p.blksize = blksize; + p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); + + if (!mdev->meta.socket || mdev->state.conn < Connected) + return FALSE; + ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, + (struct Drbd_Header *)&p, sizeof(p)); + return ok; +} + +int drbd_send_ack_dp(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, + struct Drbd_Data_Packet *dp) +{ + const int header_size = sizeof(struct Drbd_Data_Packet) + - sizeof(struct Drbd_Header); + int data_size = ((struct Drbd_Header *)dp)->length - header_size; + + return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), + dp->block_id); +} + +int drbd_send_ack_rp(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, + struct Drbd_BlockRequest_Packet *rp) +{ + return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); +} + +int drbd_send_ack(struct drbd_conf *mdev, + enum Drbd_Packet_Cmd cmd, struct Tl_epoch_entry *e) +{ + return _drbd_send_ack(mdev, cmd, + cpu_to_be64(e->sector), + cpu_to_be32(e->size), + e->block_id); +} + +/* This function misuses the block_id field to signal if the blocks + * are is sync or not. */ +int drbd_send_ack_ex(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, + sector_t sector, int blksize, u64 block_id) +{ + return _drbd_send_ack(mdev, cmd, + cpu_to_be64(sector), + cpu_to_be32(blksize), + cpu_to_be64(block_id)); +} + +int drbd_send_drequest(struct drbd_conf *mdev, int cmd, + sector_t sector, int size, u64 block_id) +{ + int ok; + struct Drbd_BlockRequest_Packet p; + + p.sector = cpu_to_be64(sector); + p.block_id = block_id; + p.blksize = cpu_to_be32(size); + + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, + (struct Drbd_Header *)&p, sizeof(p)); + return ok; +} + +int drbd_send_drequest_csum(struct drbd_conf *mdev, + sector_t sector, int size, + void *digest, int digest_size, + enum Drbd_Packet_Cmd cmd) +{ + int ok; + struct Drbd_BlockRequest_Packet p; + + p.sector = cpu_to_be64(sector); + p.block_id = BE_DRBD_MAGIC + 0xbeef; + p.blksize = cpu_to_be32(size); + + p.head.magic = BE_DRBD_MAGIC; + p.head.command = cpu_to_be16(cmd); + p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct Drbd_Header) + digest_size); + + mutex_lock(&mdev->data.mutex); + + ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0)); + ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0)); + + mutex_unlock(&mdev->data.mutex); + + return ok; +} + +int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) +{ + int ok; + struct Drbd_BlockRequest_Packet p; + + p.sector = cpu_to_be64(sector); + p.block_id = BE_DRBD_MAGIC + 0xbabe; + p.blksize = cpu_to_be32(size); + + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, OVRequest, + (struct Drbd_Header *)&p, sizeof(p)); + return ok; +} + +/* called on sndtimeo + * returns FALSE if we should retry, + * TRUE if we think connection is dead + */ +STATIC int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) +{ + int drop_it; + /* long elapsed = (long)(jiffies - mdev->last_received); */ + + drop_it = mdev->meta.socket == sock + || !mdev->asender.task + || get_t_state(&mdev->asender) != Running + || mdev->state.conn < Connected; + + if (drop_it) + return TRUE; + + drop_it = !--mdev->ko_count; + if (!drop_it) { + ERR("[%s/%d] sock_sendmsg time expired, ko = %u\n", + current->comm, current->pid, mdev->ko_count); + request_ping(mdev); + } + + return drop_it; /* && (mdev->state == Primary) */; +} + +/* The idea of sendpage seems to be to put some kind of reference + * to the page into the skb, and to hand it over to the NIC. In + * this process get_page() gets called. + * + * As soon as the page was really sent over the network put_page() + * gets called by some part of the network layer. [ NIC driver? ] + * + * [ get_page() / put_page() increment/decrement the count. If count + * reaches 0 the page will be freed. ] + * + * This works nicely with pages from FSs. + * But this means that in protocol A we might signal IO completion too early! + * + * In order not to corrupt data during a resync we must make sure + * that we do not reuse our own buffer pages (EEs) to early, therefore + * we have the net_ee list. + * + * XFS seems to have problems, still, it submits pages with page_count == 0! + * As a workaround, we disable sendpage on pages + * with page_count == 0 or PageSlab. + */ +STATIC int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, + int offset, size_t size) +{ + int ret; + ret = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); + kunmap(page); + return ret; +} + +int _drbd_send_page(struct drbd_conf *mdev, struct page *page, + int offset, size_t size) +{ + mm_segment_t oldfs = get_fs(); + int sent, ok; + int len = size; + + /* PARANOIA. if this ever triggers, + * something in the layers above us is really kaputt. + *one roundtrip later: + * doh. it triggered. so XFS _IS_ really kaputt ... + * oh well... + */ + if ((page_count(page) < 1) || PageSlab(page)) { + /* e.g. XFS meta- & log-data is in slab pages, which have a + * page_count of 0 and/or have PageSlab() set... + */ + sent = _drbd_no_send_page(mdev, page, offset, size); + if (likely(sent > 0)) + len -= sent; + goto out; + } + + drbd_update_congested(mdev); + set_fs(KERNEL_DS); + do { + sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, + offset, len, + MSG_NOSIGNAL); + if (sent == -EAGAIN) { + if (we_should_drop_the_connection(mdev, + mdev->data.socket)) + break; + else + continue; + } + if (sent <= 0) { + drbd_WARN("%s: size=%d len=%d sent=%d\n", + __func__, (int)size, len, sent); + break; + } + len -= sent; + offset += sent; + } while (len > 0 /* THINK && mdev->cstate >= Connected*/); + set_fs(oldfs); + clear_bit(NET_CONGESTED, &mdev->flags); + +out: + ok = (len == 0); + if (likely(ok)) + mdev->send_cnt += size>>9; + return ok; +} + +static inline int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) +{ + struct bio_vec *bvec; + int i; + __bio_for_each_segment(bvec, bio, i, 0) { + if (!_drbd_no_send_page(mdev, bvec->bv_page, + bvec->bv_offset, bvec->bv_len)) + return 0; + } + return 1; +} + +static inline int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) +{ + struct bio_vec *bvec; + int i; + __bio_for_each_segment(bvec, bio, i, 0) { + if (!_drbd_send_page(mdev, bvec->bv_page, + bvec->bv_offset, bvec->bv_len)) + return 0; + } + + return 1; +} + +/* Used to send write requests + * Primary -> Peer (Data) + */ +int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) +{ + int ok = 1; + struct Drbd_Data_Packet p; + unsigned int dp_flags = 0; + void *dgb; + int dgs; + + if (!drbd_get_data_sock(mdev)) + return 0; + + dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? + crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; + + p.head.magic = BE_DRBD_MAGIC; + p.head.command = cpu_to_be16(Data); + p.head.length = + cpu_to_be16(sizeof(p) - sizeof(struct Drbd_Header) + dgs + req->size); + + p.sector = cpu_to_be64(req->sector); + p.block_id = (unsigned long)req; + p.seq_num = cpu_to_be32(req->seq_num = + atomic_add_return(1, &mdev->packet_seq)); + dp_flags = 0; + + /* NOTE: no need to check if barriers supported here as we would + * not pass the test in make_request_common in that case + */ + if (bio_barrier(req->master_bio)) + dp_flags |= DP_HARDBARRIER; + if (bio_sync(req->master_bio)) + dp_flags |= DP_RW_SYNC; + if (mdev->state.conn >= SyncSource && + mdev->state.conn <= PausedSyncT) + dp_flags |= DP_MAY_SET_IN_SYNC; + + p.dp_flags = cpu_to_be32(dp_flags); + dump_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); + set_bit(UNPLUG_REMOTE, &mdev->flags); + ok = (sizeof(p) == + drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); + if (ok && dgs) { + dgb = mdev->int_dig_out; + drbd_csum(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); + } + if (ok) { + if (mdev->net_conf->wire_protocol == DRBD_PROT_A) + ok = _drbd_send_bio(mdev, req->master_bio); + else + ok = _drbd_send_zc_bio(mdev, req->master_bio); + } + + drbd_put_data_sock(mdev); + return ok; +} + +/* answer packet, used to send data back for read requests: + * Peer -> (diskless) Primary (DataReply) + * SyncSource -> SyncTarget (RSDataReply) + */ +int drbd_send_block(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, + struct Tl_epoch_entry *e) +{ + int ok; + struct Drbd_Data_Packet p; + void *dgb; + int dgs; + + dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? + crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; + + p.head.magic = BE_DRBD_MAGIC; + p.head.command = cpu_to_be16(cmd); + p.head.length = + cpu_to_be16(sizeof(p) - sizeof(struct Drbd_Header) + dgs + e->size); + + p.sector = cpu_to_be64(e->sector); + p.block_id = e->block_id; + /* p.seq_num = 0; No sequence numbers here.. */ + + /* Only called by our kernel thread. + * This one may be interupted by DRBD_SIG and/or DRBD_SIGKILL + * in response to admin command or module unload. + */ + if (!drbd_get_data_sock(mdev)) + return 0; + + dump_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); + ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, + sizeof(p), MSG_MORE); + if (ok && dgs) { + dgb = mdev->int_dig_out; + drbd_csum(mdev, mdev->integrity_w_tfm, e->private_bio, dgb); + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); + } + if (ok) + ok = _drbd_send_zc_bio(mdev, e->private_bio); + + drbd_put_data_sock(mdev); + return ok; +} + +/* + drbd_send distinguishes two cases: + + Packets sent via the data socket "sock" + and packets sent via the meta data socket "msock" + + sock msock + -----------------+-------------------------+------------------------------ + timeout conf.timeout / 2 conf.timeout / 2 + timeout action send a ping via msock Abort communication + and close all sockets +*/ + +/* + * you must have down()ed the appropriate [m]sock_mutex elsewhere! + */ +int drbd_send(struct drbd_conf *mdev, struct socket *sock, + void *buf, size_t size, unsigned msg_flags) +{ + struct kvec iov; + struct msghdr msg; + int rv, sent = 0; + + if (!sock) + return -1000; + + /* THINK if (signal_pending) return ... ? */ + + iov.iov_base = buf; + iov.iov_len = size; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = msg_flags | MSG_NOSIGNAL; + + if (sock == mdev->data.socket) { + mdev->ko_count = mdev->net_conf->ko_count; + drbd_update_congested(mdev); + } + do { + /* STRANGE + * tcp_sendmsg does _not_ use its size parameter at all ? + * + * -EAGAIN on timeout, -EINTR on signal. + */ +/* THINK + * do we need to block DRBD_SIG if sock == &meta.socket ?? + * otherwise wake_asender() might interrupt some send_*Ack ! + */ + rv = kernel_sendmsg(sock, &msg, &iov, 1, size); + if (rv == -EAGAIN) { + if (we_should_drop_the_connection(mdev, sock)) + break; + else + continue; + } + D_ASSERT(rv != 0); + if (rv == -EINTR) { + flush_signals(current); + rv = 0; + } + if (rv < 0) + break; + sent += rv; + iov.iov_base += rv; + iov.iov_len -= rv; + } while (sent < size); + + if (sock == mdev->data.socket) + clear_bit(NET_CONGESTED, &mdev->flags); + + if (rv <= 0) { + if (rv != -EAGAIN) { + ERR("%s_sendmsg returned %d\n", + sock == mdev->meta.socket ? "msock" : "sock", + rv); + drbd_force_state(mdev, NS(conn, BrokenPipe)); + } else + drbd_force_state(mdev, NS(conn, Timeout)); + } + + return sent; +} + +static int drbd_open(struct block_device *bdev, fmode_t mode) +{ + struct drbd_conf *mdev = bdev->bd_disk->private_data; + unsigned long flags; + int rv = 0; + + spin_lock_irqsave(&mdev->req_lock, flags); + /* to have a stable mdev->state.role + * and no race with updating open_cnt */ + + if (mdev->state.role != Primary) { + if (mode & FMODE_WRITE) + rv = -EROFS; + else if (!allow_oos) + rv = -EMEDIUMTYPE; + } + + if (!rv) + mdev->open_cnt++; + spin_unlock_irqrestore(&mdev->req_lock, flags); + + return rv; +} + +static int drbd_release(struct gendisk *gd, fmode_t mode) +{ + struct drbd_conf *mdev = gd->private_data; + mdev->open_cnt--; + return 0; +} + +STATIC void drbd_unplug_fn(struct request_queue *q) +{ + struct drbd_conf *mdev = q->queuedata; + + MTRACE(TraceTypeUnplug, TraceLvlSummary, + INFO("got unplugged ap_bio_count=%d\n", + atomic_read(&mdev->ap_bio_cnt)); + ); + + /* unplug FIRST */ + spin_lock_irq(q->queue_lock); + blk_remove_plug(q); + spin_unlock_irq(q->queue_lock); + + /* only if connected */ + spin_lock_irq(&mdev->req_lock); + if (mdev->state.pdsk >= Inconsistent && mdev->state.conn >= Connected) { + D_ASSERT(mdev->state.role == Primary); + if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) { + /* add to the data.work queue, + * unless already queued. + * XXX this might be a good addition to drbd_queue_work + * anyways, to detect "double queuing" ... */ + if (list_empty(&mdev->unplug_work.list)) + drbd_queue_work(&mdev->data.work, + &mdev->unplug_work); + } + } + spin_unlock_irq(&mdev->req_lock); + + if (mdev->state.disk >= Inconsistent) + drbd_kick_lo(mdev); +} + +STATIC void drbd_set_defaults(struct drbd_conf *mdev) +{ + mdev->sync_conf.after = DRBD_AFTER_DEF; + mdev->sync_conf.rate = DRBD_RATE_DEF; + mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_DEF; + mdev->state = (union drbd_state_t) { + { .role = Secondary, + .peer = Unknown, + .conn = StandAlone, + .disk = Diskless, + .pdsk = DUnknown, + .susp = 0 + } }; +} + +void drbd_init_set_defaults(struct drbd_conf *mdev) +{ + /* the memset(,0,) did most of this. + * note: only assignments, no allocation in here */ + + drbd_set_defaults(mdev); + + /* for now, we do NOT yet support it, + * even though we start some framework + * to eventually support barriers */ + set_bit(NO_BARRIER_SUPP, &mdev->flags); + + atomic_set(&mdev->ap_bio_cnt, 0); + atomic_set(&mdev->ap_pending_cnt, 0); + atomic_set(&mdev->rs_pending_cnt, 0); + atomic_set(&mdev->unacked_cnt, 0); + atomic_set(&mdev->local_cnt, 0); + atomic_set(&mdev->net_cnt, 0); + atomic_set(&mdev->packet_seq, 0); + atomic_set(&mdev->pp_in_use, 0); + + mutex_init(&mdev->md_io_mutex); + mutex_init(&mdev->data.mutex); + mutex_init(&mdev->meta.mutex); + sema_init(&mdev->data.work.s, 0); + sema_init(&mdev->meta.work.s, 0); + mutex_init(&mdev->state_mutex); + + spin_lock_init(&mdev->data.work.q_lock); + spin_lock_init(&mdev->meta.work.q_lock); + + spin_lock_init(&mdev->al_lock); + spin_lock_init(&mdev->req_lock); + spin_lock_init(&mdev->peer_seq_lock); + spin_lock_init(&mdev->epoch_lock); + + INIT_LIST_HEAD(&mdev->active_ee); + INIT_LIST_HEAD(&mdev->sync_ee); + INIT_LIST_HEAD(&mdev->done_ee); + INIT_LIST_HEAD(&mdev->read_ee); + INIT_LIST_HEAD(&mdev->net_ee); + INIT_LIST_HEAD(&mdev->resync_reads); + INIT_LIST_HEAD(&mdev->data.work.q); + INIT_LIST_HEAD(&mdev->meta.work.q); + INIT_LIST_HEAD(&mdev->resync_work.list); + INIT_LIST_HEAD(&mdev->unplug_work.list); + INIT_LIST_HEAD(&mdev->md_sync_work.list); + INIT_LIST_HEAD(&mdev->bm_io_work.w.list); + mdev->resync_work.cb = w_resync_inactive; + mdev->unplug_work.cb = w_send_write_hint; + mdev->md_sync_work.cb = w_md_sync; + mdev->bm_io_work.w.cb = w_bitmap_io; + init_timer(&mdev->resync_timer); + init_timer(&mdev->md_sync_timer); + mdev->resync_timer.function = resync_timer_fn; + mdev->resync_timer.data = (unsigned long) mdev; + mdev->md_sync_timer.function = md_sync_timer_fn; + mdev->md_sync_timer.data = (unsigned long) mdev; + + init_waitqueue_head(&mdev->misc_wait); + init_waitqueue_head(&mdev->state_wait); + init_waitqueue_head(&mdev->ee_wait); + init_waitqueue_head(&mdev->al_wait); + init_waitqueue_head(&mdev->seq_wait); + + drbd_thread_init(mdev, &mdev->receiver, drbdd_init); + drbd_thread_init(mdev, &mdev->worker, drbd_worker); + drbd_thread_init(mdev, &mdev->asender, drbd_asender); + + mdev->agreed_pro_version = PRO_VERSION_MAX; + mdev->write_ordering = WO_bio_barrier; + mdev->resync_wenr = LC_FREE; +} + +void drbd_mdev_cleanup(struct drbd_conf *mdev) +{ + if (mdev->receiver.t_state != None) + ERR("ASSERT FAILED: receiver t_state == %d expected 0.\n", + mdev->receiver.t_state); + + /* no need to lock it, I'm the only thread alive */ + if (atomic_read(&mdev->current_epoch->epoch_size) != 0) + ERR("epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size)); + mdev->al_writ_cnt = + mdev->bm_writ_cnt = + mdev->read_cnt = + mdev->recv_cnt = + mdev->send_cnt = + mdev->writ_cnt = + mdev->p_size = + mdev->rs_start = + mdev->rs_total = + mdev->rs_failed = + mdev->rs_mark_left = + mdev->rs_mark_time = 0; + D_ASSERT(mdev->net_conf == NULL); + + drbd_set_my_capacity(mdev, 0); + drbd_bm_resize(mdev, 0); + drbd_bm_cleanup(mdev); + + drbd_free_resources(mdev); + + /* + * currently we drbd_init_ee only on module load, so + * we may do drbd_release_ee only on module unload! + */ + D_ASSERT(list_empty(&mdev->active_ee)); + D_ASSERT(list_empty(&mdev->sync_ee)); + D_ASSERT(list_empty(&mdev->done_ee)); + D_ASSERT(list_empty(&mdev->read_ee)); + D_ASSERT(list_empty(&mdev->net_ee)); + D_ASSERT(list_empty(&mdev->resync_reads)); + D_ASSERT(list_empty(&mdev->data.work.q)); + D_ASSERT(list_empty(&mdev->meta.work.q)); + D_ASSERT(list_empty(&mdev->resync_work.list)); + D_ASSERT(list_empty(&mdev->unplug_work.list)); + +} + + +STATIC void drbd_destroy_mempools(void) +{ + struct page *page; + + while (drbd_pp_pool) { + page = drbd_pp_pool; + drbd_pp_pool = (struct page *)page_private(page); + __free_page(page); + drbd_pp_vacant--; + } + + /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ + + if (drbd_ee_mempool) + mempool_destroy(drbd_ee_mempool); + if (drbd_request_mempool) + mempool_destroy(drbd_request_mempool); + if (drbd_ee_cache) + kmem_cache_destroy(drbd_ee_cache); + if (drbd_request_cache) + kmem_cache_destroy(drbd_request_cache); + + drbd_ee_mempool = NULL; + drbd_request_mempool = NULL; + drbd_ee_cache = NULL; + drbd_request_cache = NULL; + + return; +} + +STATIC int drbd_create_mempools(void) +{ + struct page *page; + const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count; + int i; + + /* prepare our caches and mempools */ + drbd_request_mempool = NULL; + drbd_ee_cache = NULL; + drbd_request_cache = NULL; + drbd_pp_pool = NULL; + + /* caches */ + drbd_request_cache = kmem_cache_create( + "drbd_req_cache", sizeof(struct drbd_request), 0, 0, NULL); + if (drbd_request_cache == NULL) + goto Enomem; + + drbd_ee_cache = kmem_cache_create( + "drbd_ee_cache", sizeof(struct Tl_epoch_entry), 0, 0, NULL); + if (drbd_ee_cache == NULL) + goto Enomem; + + /* mempools */ + drbd_request_mempool = mempool_create(number, + mempool_alloc_slab, mempool_free_slab, drbd_request_cache); + if (drbd_request_mempool == NULL) + goto Enomem; + + drbd_ee_mempool = mempool_create(number, + mempool_alloc_slab, mempool_free_slab, drbd_ee_cache); + if (drbd_request_mempool == NULL) + goto Enomem; + + /* drbd's page pool */ + spin_lock_init(&drbd_pp_lock); + + for (i = 0; i < number; i++) { + page = alloc_page(GFP_HIGHUSER); + if (!page) + goto Enomem; + set_page_private(page, (unsigned long)drbd_pp_pool); + drbd_pp_pool = page; + } + drbd_pp_vacant = number; + + return 0; + +Enomem: + drbd_destroy_mempools(); /* in case we allocated some */ + return -ENOMEM; +} + +STATIC int drbd_notify_sys(struct notifier_block *this, unsigned long code, + void *unused) +{ + /* just so we have it. you never know what interessting things we + * might want to do here some day... + */ + + return NOTIFY_DONE; +} + +STATIC struct notifier_block drbd_notifier = { + .notifier_call = drbd_notify_sys, +}; + +static void drbd_release_ee_lists(struct drbd_conf *mdev) +{ + int rr; + + rr = drbd_release_ee(mdev, &mdev->active_ee); + if (rr) + ERR("%d EEs in active list found!\n", rr); + + rr = drbd_release_ee(mdev, &mdev->sync_ee); + if (rr) + ERR("%d EEs in sync list found!\n", rr); + + rr = drbd_release_ee(mdev, &mdev->read_ee); + if (rr) + ERR("%d EEs in read list found!\n", rr); + + rr = drbd_release_ee(mdev, &mdev->done_ee); + if (rr) + ERR("%d EEs in done list found!\n", rr); + + rr = drbd_release_ee(mdev, &mdev->net_ee); + if (rr) + ERR("%d EEs in net list found!\n", rr); +} + +/* caution. no locking. + * currently only used from module cleanup code. */ +static void drbd_delete_device(unsigned int minor) +{ + struct drbd_conf *mdev = minor_to_mdev(minor); + + if (!mdev) + return; + + /* paranoia asserts */ + if (mdev->open_cnt != 0) + ERR("open_cnt = %d in %s:%u", mdev->open_cnt, + __FILE__ , __LINE__); + + ERR_IF (!list_empty(&mdev->data.work.q)) { + struct list_head *lp; + list_for_each(lp, &mdev->data.work.q) { + DUMPP(lp); + } + }; + /* end paranoia asserts */ + + del_gendisk(mdev->vdisk); + + /* cleanup stuff that may have been allocated during + * device (re-)configuration or state changes */ + + if (mdev->this_bdev) + bdput(mdev->this_bdev); + + drbd_free_resources(mdev); + + drbd_release_ee_lists(mdev); + + /* should be free'd on disconnect? */ + kfree(mdev->ee_hash); + /* + mdev->ee_hash_s = 0; + mdev->ee_hash = NULL; + */ + + if (mdev->act_log) + lc_free(mdev->act_log); + if (mdev->resync) + lc_free(mdev->resync); + + kfree(mdev->p_uuid); + /* mdev->p_uuid = NULL; */ + + kfree(mdev->int_dig_out); + kfree(mdev->int_dig_in); + kfree(mdev->int_dig_vv); + + /* cleanup the rest that has been + * allocated from drbd_new_device + * and actually free the mdev itself */ + drbd_free_mdev(mdev); +} + +STATIC void drbd_cleanup(void) +{ + unsigned int i; + + unregister_reboot_notifier(&drbd_notifier); + + drbd_nl_cleanup(); + + if (minor_table) { + if (drbd_proc) + remove_proc_entry("drbd", NULL); + i = minor_count; + while (i--) + drbd_delete_device(i); + drbd_destroy_mempools(); + } + + kfree(minor_table); + + unregister_blkdev(DRBD_MAJOR, "drbd"); + + printk(KERN_INFO "drbd: module cleanup done.\n"); +} + +/** + * drbd_congested: Returns 1<bc->backing_bdev); + r = bdi_congested(&q->backing_dev_info, bdi_bits); + dec_local(mdev); + if (r) { + reason = 'b'; + goto out; + } + } + + if (bdi_bits & (1 << BDI_write_congested) && test_bit(NET_CONGESTED, &mdev->flags)) { + r = (1 << BDI_write_congested); + reason = 'n'; + } + +out: + mdev->congestion_reason = reason; + return r; +} + +struct drbd_conf *drbd_new_device(unsigned int minor) +{ + struct drbd_conf *mdev; + struct gendisk *disk; + struct request_queue *q; + + mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); + if (!mdev) + return NULL; + + mdev->minor = minor; + + drbd_init_set_defaults(mdev); + + q = blk_alloc_queue(GFP_KERNEL); + if (!q) + goto out_no_q; + mdev->rq_queue = q; + q->queuedata = mdev; + q->max_segment_size = DRBD_MAX_SEGMENT_SIZE; + + disk = alloc_disk(1); + if (!disk) + goto out_no_disk; + mdev->vdisk = disk; + + set_disk_ro(disk, TRUE); + + disk->queue = q; + disk->major = DRBD_MAJOR; + disk->first_minor = minor; + disk->fops = &drbd_ops; + sprintf(disk->disk_name, "drbd%d", minor); + disk->private_data = mdev; + + mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor)); + /* we have no partitions. we contain only ourselves. */ + mdev->this_bdev->bd_contains = mdev->this_bdev; + + q->backing_dev_info.congested_fn = drbd_congested; + q->backing_dev_info.congested_data = mdev; + + blk_queue_make_request(q, drbd_make_request_26); + blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); + blk_queue_merge_bvec(q, drbd_merge_bvec); + q->queue_lock = &mdev->req_lock; /* needed since we use */ + /* plugging on a queue, that actually has no requests! */ + q->unplug_fn = drbd_unplug_fn; + + mdev->md_io_page = alloc_page(GFP_KERNEL); + if (!mdev->md_io_page) + goto out_no_io_page; + + if (drbd_bm_init(mdev)) + goto out_no_bitmap; + /* no need to lock access, we are still initializing the module. */ + if (!tl_init(mdev)) + goto out_no_tl; + + mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); + if (!mdev->app_reads_hash) + goto out_no_app_reads; + + mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); + if (!mdev->current_epoch) + goto out_no_epoch; + + INIT_LIST_HEAD(&mdev->current_epoch->list); + mdev->epochs = 1; + + return mdev; + +/* out_whatever_else: + kfree(mdev->current_epoch); */ +out_no_epoch: + kfree(mdev->app_reads_hash); +out_no_app_reads: + tl_cleanup(mdev); +out_no_tl: + drbd_bm_cleanup(mdev); +out_no_bitmap: + __free_page(mdev->md_io_page); +out_no_io_page: + put_disk(disk); +out_no_disk: + blk_cleanup_queue(q); +out_no_q: + kfree(mdev); + return NULL; +} + +/* counterpart of drbd_new_device. + * last part of drbd_delete_device. */ +void drbd_free_mdev(struct drbd_conf *mdev) +{ + kfree(mdev->current_epoch); + kfree(mdev->app_reads_hash); + tl_cleanup(mdev); + if (mdev->bitmap) /* should no longer be there. */ + drbd_bm_cleanup(mdev); + __free_page(mdev->md_io_page); + put_disk(mdev->vdisk); + blk_cleanup_queue(mdev->rq_queue); + kfree(mdev); +} + + +int __init drbd_init(void) +{ + int err; + + if (sizeof(struct Drbd_HandShake_Packet) != 80) { + printk(KERN_ERR + "drbd: never change the size or layout " + "of the HandShake packet.\n"); + return -EINVAL; + } + + if (1 > minor_count || minor_count > 255) { + printk(KERN_ERR + "drbd: invalid minor_count (%d)\n", minor_count); +#ifdef MODULE + return -EINVAL; +#else + minor_count = 8; +#endif + } + + err = drbd_nl_init(); + if (err) + return err; + + err = register_blkdev(DRBD_MAJOR, "drbd"); + if (err) { + printk(KERN_ERR + "drbd: unable to register block device major %d\n", + DRBD_MAJOR); + return err; + } + + register_reboot_notifier(&drbd_notifier); + + /* + * allocate all necessary structs + */ + err = -ENOMEM; + + init_waitqueue_head(&drbd_pp_wait); + + drbd_proc = NULL; /* play safe for drbd_cleanup */ + minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count, + GFP_KERNEL); + if (!minor_table) + goto Enomem; + + err = drbd_create_mempools(); + if (err) + goto Enomem; + + drbd_proc = proc_create("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops); + if (!drbd_proc) { + printk(KERN_ERR "drbd: unable to register proc file\n"); + goto Enomem; + } + + rwlock_init(&global_state_lock); + + printk(KERN_INFO "drbd: initialised. " + "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", + API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); + printk(KERN_INFO "drbd: %s\n", drbd_buildtag()); + printk(KERN_INFO "drbd: registered as block device major %d\n", + DRBD_MAJOR); + printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table); + + return 0; /* Success! */ + +Enomem: + drbd_cleanup(); + if (err == -ENOMEM) + /* currently always the case */ + printk(KERN_ERR "drbd: ran out of memory\n"); + else + printk(KERN_ERR "drbd: initialization failure\n"); + return err; +} + +void drbd_free_bc(struct drbd_backing_dev *bc) +{ + if (bc == NULL) + return; + + bd_release(bc->backing_bdev); + bd_release(bc->md_bdev); + + fput(bc->lo_file); + fput(bc->md_file); + + kfree(bc); +} + +void drbd_free_sock(struct drbd_conf *mdev) +{ + if (mdev->data.socket) { + sock_release(mdev->data.socket); + mdev->data.socket = NULL; + } + if (mdev->meta.socket) { + sock_release(mdev->meta.socket); + mdev->meta.socket = NULL; + } +} + + +void drbd_free_resources(struct drbd_conf *mdev) +{ + crypto_free_hash(mdev->csums_tfm); + mdev->csums_tfm = NULL; + crypto_free_hash(mdev->verify_tfm); + mdev->verify_tfm = NULL; + crypto_free_hash(mdev->cram_hmac_tfm); + mdev->cram_hmac_tfm = NULL; + crypto_free_hash(mdev->integrity_w_tfm); + mdev->integrity_w_tfm = NULL; + crypto_free_hash(mdev->integrity_r_tfm); + mdev->integrity_r_tfm = NULL; + + drbd_free_sock(mdev); + + __no_warn(local, + drbd_free_bc(mdev->bc); + mdev->bc = NULL;); +} + +/*********************************/ +/* meta data management */ + +struct meta_data_on_disk { + u64 la_size; /* last agreed size. */ + u64 uuid[UUID_SIZE]; /* UUIDs. */ + u64 device_uuid; + u64 reserved_u64_1; + u32 flags; /* MDF */ + u32 magic; + u32 md_size_sect; + u32 al_offset; /* offset to this block */ + u32 al_nr_extents; /* important for restoring the AL */ + /* `-- act_log->nr_elements <-- sync_conf.al_extents */ + u32 bm_offset; /* offset to the bitmap, from here */ + u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ + u32 reserved_u32[4]; + +} __attribute((packed)); + +/** + * drbd_md_sync: + * Writes the meta data super block if the MD_DIRTY flag bit is set. + */ +void drbd_md_sync(struct drbd_conf *mdev) +{ + struct meta_data_on_disk *buffer; + sector_t sector; + int i; + + if (!test_and_clear_bit(MD_DIRTY, &mdev->flags)) + return; + del_timer(&mdev->md_sync_timer); + + /* We use here Failed and not Attaching because we try to write + * metadata even if we detach due to a disk failure! */ + if (!inc_local_if_state(mdev, Failed)) + return; + + MTRACE(TraceTypeMDIO, TraceLvlSummary, + INFO("Writing meta data super block now.\n"); + ); + + mutex_lock(&mdev->md_io_mutex); + buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + memset(buffer, 0, 512); + + buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); + for (i = Current; i < UUID_SIZE; i++) + buffer->uuid[i] = cpu_to_be64(mdev->bc->md.uuid[i]); + buffer->flags = cpu_to_be32(mdev->bc->md.flags); + buffer->magic = cpu_to_be32(DRBD_MD_MAGIC); + + buffer->md_size_sect = cpu_to_be32(mdev->bc->md.md_size_sect); + buffer->al_offset = cpu_to_be32(mdev->bc->md.al_offset); + buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements); + buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE); + buffer->device_uuid = cpu_to_be64(mdev->bc->md.device_uuid); + + buffer->bm_offset = cpu_to_be32(mdev->bc->md.bm_offset); + + D_ASSERT(drbd_md_ss__(mdev, mdev->bc) == mdev->bc->md.md_offset); + sector = mdev->bc->md.md_offset; + + if (drbd_md_sync_page_io(mdev, mdev->bc, sector, WRITE)) { + clear_bit(MD_DIRTY, &mdev->flags); + } else { + /* this was a try anyways ... */ + ERR("meta data update failed!\n"); + + drbd_chk_io_error(mdev, 1, TRUE); + drbd_io_error(mdev, TRUE); + } + + /* Update mdev->bc->md.la_size_sect, + * since we updated it on metadata. */ + mdev->bc->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); + + mutex_unlock(&mdev->md_io_mutex); + dec_local(mdev); +} + +/** + * drbd_md_read: + * @bdev: describes the backing storage and the meta-data storage + * Reads the meta data from bdev. Return 0 (NoError) on success, and an + * enum ret_codes in case something goes wrong. + * Currently only: MDIOError, MDInvalid. + */ +int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) +{ + struct meta_data_on_disk *buffer; + int i, rv = NoError; + + if (!inc_local_if_state(mdev, Attaching)) + return MDIOError; + + mutex_lock(&mdev->md_io_mutex); + buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + + if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { + /* NOTE: cant do normal error processing here as this is + called BEFORE disk is attached */ + ERR("Error while reading metadata.\n"); + rv = MDIOError; + goto err; + } + + if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) { + ERR("Error while reading metadata, magic not found.\n"); + rv = MDInvalid; + goto err; + } + if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) { + ERR("unexpected al_offset: %d (expected %d)\n", + be32_to_cpu(buffer->al_offset), bdev->md.al_offset); + rv = MDInvalid; + goto err; + } + if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { + ERR("unexpected bm_offset: %d (expected %d)\n", + be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); + rv = MDInvalid; + goto err; + } + if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) { + ERR("unexpected md_size: %u (expected %u)\n", + be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect); + rv = MDInvalid; + goto err; + } + + if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { + ERR("unexpected bm_bytes_per_bit: %u (expected %u)\n", + be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); + rv = MDInvalid; + goto err; + } + + bdev->md.la_size_sect = be64_to_cpu(buffer->la_size); + for (i = Current; i < UUID_SIZE; i++) + bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); + bdev->md.flags = be32_to_cpu(buffer->flags); + mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); + bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); + + if (mdev->sync_conf.al_extents < 7) + mdev->sync_conf.al_extents = 127; + + err: + mutex_unlock(&mdev->md_io_mutex); + dec_local(mdev); + + return rv; +} + +/** + * drbd_md_mark_dirty: + * Call this function if you change enything that should be written to + * the meta-data super block. This function sets MD_DIRTY, and starts a + * timer that ensures that within five seconds you have to call drbd_md_sync(). + */ +void drbd_md_mark_dirty(struct drbd_conf *mdev) +{ + set_bit(MD_DIRTY, &mdev->flags); + mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ); +} + + +STATIC void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) +{ + int i; + + for (i = History_start; i < History_end; i++) { + mdev->bc->md.uuid[i+1] = mdev->bc->md.uuid[i]; + + MTRACE(TraceTypeUuid, TraceLvlAll, + drbd_print_uuid(mdev, i+1); + ); + } +} + +void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) +{ + if (idx == Current) { + if (mdev->state.role == Primary) + val |= 1; + else + val &= ~((u64)1); + + drbd_set_ed_uuid(mdev, val); + } + + mdev->bc->md.uuid[idx] = val; + + MTRACE(TraceTypeUuid, TraceLvlSummary, + drbd_print_uuid(mdev, idx); + ); + + drbd_md_mark_dirty(mdev); +} + + +void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) +{ + if (mdev->bc->md.uuid[idx]) { + drbd_uuid_move_history(mdev); + mdev->bc->md.uuid[History_start] = mdev->bc->md.uuid[idx]; + MTRACE(TraceTypeUuid, TraceLvlMetrics, + drbd_print_uuid(mdev, History_start); + ); + } + _drbd_uuid_set(mdev, idx, val); +} + +/** + * drbd_uuid_new_current: + * Creates a new current UUID, and rotates the old current UUID into + * the bitmap slot. Causes an incremental resync upon next connect. + */ +void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) +{ + u64 val; + + INFO("Creating new current UUID\n"); + D_ASSERT(mdev->bc->md.uuid[Bitmap] == 0); + mdev->bc->md.uuid[Bitmap] = mdev->bc->md.uuid[Current]; + MTRACE(TraceTypeUuid, TraceLvlMetrics, + drbd_print_uuid(mdev, Bitmap); + ); + + get_random_bytes(&val, sizeof(u64)); + _drbd_uuid_set(mdev, Current, val); +} + +void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) +{ + if (mdev->bc->md.uuid[Bitmap] == 0 && val == 0) + return; + + if (val == 0) { + drbd_uuid_move_history(mdev); + mdev->bc->md.uuid[History_start] = mdev->bc->md.uuid[Bitmap]; + mdev->bc->md.uuid[Bitmap] = 0; + + MTRACE(TraceTypeUuid, TraceLvlMetrics, + drbd_print_uuid(mdev, History_start); + drbd_print_uuid(mdev, Bitmap); + ); + } else { + if (mdev->bc->md.uuid[Bitmap]) + drbd_WARN("bm UUID already set"); + + mdev->bc->md.uuid[Bitmap] = val; + mdev->bc->md.uuid[Bitmap] &= ~((u64)1); + + MTRACE(TraceTypeUuid, TraceLvlMetrics, + drbd_print_uuid(mdev, Bitmap); + ); + } + drbd_md_mark_dirty(mdev); +} + +/** + * drbd_bmio_set_n_write: + * Is an io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() that sets + * all bits in the bitmap and writes the whole bitmap to stable storage. + */ +int drbd_bmio_set_n_write(struct drbd_conf *mdev) +{ + int rv = -EIO; + + if (inc_local_if_state(mdev, Attaching)) { + drbd_md_set_flag(mdev, MDF_FullSync); + drbd_md_sync(mdev); + drbd_bm_set_all(mdev); + + rv = drbd_bm_write(mdev); + + if (!rv) { + drbd_md_clear_flag(mdev, MDF_FullSync); + drbd_md_sync(mdev); + } + + dec_local(mdev); + } + + return rv; +} + +/** + * drbd_bmio_clear_n_write: + * Is an io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() that clears + * all bits in the bitmap and writes the whole bitmap to stable storage. + */ +int drbd_bmio_clear_n_write(struct drbd_conf *mdev) +{ + int rv = -EIO; + + if (inc_local_if_state(mdev, Attaching)) { + drbd_bm_clear_all(mdev); + rv = drbd_bm_write(mdev); + dec_local(mdev); + } + + return rv; +} + +STATIC int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct bm_io_work *work = (struct bm_io_work *)w; + int rv; + + D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); + + drbd_bm_lock(mdev, work->why); + rv = work->io_fn(mdev); + drbd_bm_unlock(mdev); + + clear_bit(BITMAP_IO, &mdev->flags); + wake_up(&mdev->misc_wait); + + if (work->done) + work->done(mdev, rv); + + clear_bit(BITMAP_IO_QUEUED, &mdev->flags); + work->why = NULL; + + return 1; +} + +/** + * drbd_queue_bitmap_io: + * Queues an IO operation on the whole bitmap. + * While IO on the bitmap happens we freeze appliation IO thus we ensure + * that drbd_set_out_of_sync() can not be called. + * This function MUST ONLY be called from worker context. + * BAD API ALERT! + * It MUST NOT be used while a previous such work is still pending! + */ +void drbd_queue_bitmap_io(struct drbd_conf *mdev, + int (*io_fn)(struct drbd_conf *), + void (*done)(struct drbd_conf *, int), + char *why) +{ + D_ASSERT(current == mdev->worker.task); + + D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags)); + D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags)); + D_ASSERT(list_empty(&mdev->bm_io_work.w.list)); + if (mdev->bm_io_work.why) + ERR("FIXME going to queue '%s' but '%s' still pending?\n", + why, mdev->bm_io_work.why); + + mdev->bm_io_work.io_fn = io_fn; + mdev->bm_io_work.done = done; + mdev->bm_io_work.why = why; + + set_bit(BITMAP_IO, &mdev->flags); + if (atomic_read(&mdev->ap_bio_cnt) == 0) { + if (list_empty(&mdev->bm_io_work.w.list)) { + set_bit(BITMAP_IO_QUEUED, &mdev->flags); + drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); + } else + ERR("FIXME avoided double queuing bm_io_work\n"); + } +} + +/** + * drbd_bitmap_io: + * Does an IO operation on the bitmap, freezing application IO while that + * IO operations runs. This functions MUST NOT be called from worker context. + */ +int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why) +{ + int rv; + + D_ASSERT(current != mdev->worker.task); + + drbd_suspend_io(mdev); + + drbd_bm_lock(mdev, why); + rv = io_fn(mdev); + drbd_bm_unlock(mdev); + + drbd_resume_io(mdev); + + return rv; +} + +void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local) +{ + if ((mdev->bc->md.flags & flag) != flag) { + drbd_md_mark_dirty(mdev); + mdev->bc->md.flags |= flag; + } +} + +void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local) +{ + if ((mdev->bc->md.flags & flag) != 0) { + drbd_md_mark_dirty(mdev); + mdev->bc->md.flags &= ~flag; + } +} +int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag) +{ + return (bdev->md.flags & flag) != 0; +} + +STATIC void md_sync_timer_fn(unsigned long data) +{ + struct drbd_conf *mdev = (struct drbd_conf *) data; + + drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work); +} + +STATIC int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + drbd_WARN("md_sync_timer expired! Worker calls drbd_md_sync().\n"); + drbd_md_sync(mdev); + + return 1; +} + +#ifdef DRBD_ENABLE_FAULTS +/* Fault insertion support including random number generator shamelessly + * stolen from kernel/rcutorture.c */ +struct fault_random_state { + unsigned long state; + unsigned long count; +}; + +#define FAULT_RANDOM_MULT 39916801 /* prime */ +#define FAULT_RANDOM_ADD 479001701 /* prime */ +#define FAULT_RANDOM_REFRESH 10000 + +/* + * Crude but fast random-number generator. Uses a linear congruential + * generator, with occasional help from get_random_bytes(). + */ +STATIC unsigned long +_drbd_fault_random(struct fault_random_state *rsp) +{ + long refresh; + + if (--rsp->count < 0) { + get_random_bytes(&refresh, sizeof(refresh)); + rsp->state += refresh; + rsp->count = FAULT_RANDOM_REFRESH; + } + rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD; + return swahw32(rsp->state); +} + +STATIC char * +_drbd_fault_str(unsigned int type) { + static char *_faults[] = { + "Meta-data write", + "Meta-data read", + "Resync write", + "Resync read", + "Data write", + "Data read", + "Data read ahead", + }; + + return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**"; +} + +unsigned int +_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) +{ + static struct fault_random_state rrs = {0, 0}; + + unsigned int ret = ( + (fault_devs == 0 || + ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) && + (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate)); + + if (ret) { + fault_count++; + + if (printk_ratelimit()) + drbd_WARN("***Simulating %s failure\n", + _drbd_fault_str(type)); + } + + return ret; +} +#endif + +#ifdef ENABLE_DYNAMIC_TRACE + +STATIC char *_drbd_uuid_str(unsigned int idx) +{ + static char *uuid_str[] = { + "Current", + "Bitmap", + "History_start", + "History_end", + "UUID_SIZE", + "UUID_FLAGS", + }; + + return (idx < EXT_UUID_SIZE) ? uuid_str[idx] : "*Unknown UUID index*"; +} + +/* Pretty print a UUID value */ +void drbd_print_uuid(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) +{ + INFO(" uuid[%s] now %016llX\n", + _drbd_uuid_str(idx), (unsigned long long)mdev->bc->md.uuid[idx]); +} + + +/* + * + * drbd_print_buffer + * + * This routine dumps binary data to the debugging output. Can be + * called at interrupt level. + * + * Arguments: + * + * prefix - String is output at the beginning of each line output + * flags - Control operation of the routine. Currently defined + * Flags are: + * DBGPRINT_BUFFADDR; if set, each line starts with the + * virtual address of the line being outupt. If clear, + * each line starts with the offset from the beginning + * of the buffer. + * size - Indicates the size of each entry in the buffer. Supported + * values are sizeof(char), sizeof(short) and sizeof(int) + * buffer - Start address of buffer + * buffer_va - Virtual address of start of buffer (normally the same + * as Buffer, but having it separate allows it to hold + * file address for example) + * length - length of buffer + * + */ +void +drbd_print_buffer(const char *prefix, unsigned int flags, int size, + const void *buffer, const void *buffer_va, + unsigned int length) + +#define LINE_SIZE 16 +#define LINE_ENTRIES (int)(LINE_SIZE/size) +{ + const unsigned char *pstart; + const unsigned char *pstart_va; + const unsigned char *pend; + char bytes_str[LINE_SIZE*3+8], ascii_str[LINE_SIZE+8]; + char *pbytes = bytes_str, *pascii = ascii_str; + int offset = 0; + long sizemask; + int field_width; + int index; + const unsigned char *pend_str; + const unsigned char *p; + int count; + + /* verify size parameter */ + if (size != sizeof(char) && + size != sizeof(short) && + size != sizeof(int)) { + printk(KERN_DEBUG "drbd_print_buffer: " + "ERROR invalid size %d\n", size); + return; + } + + sizemask = size-1; + field_width = size*2; + + /* Adjust start/end to be on appropriate boundary for size */ + buffer = (const char *)((long)buffer & ~sizemask); + pend = (const unsigned char *) + (((long)buffer + length + sizemask) & ~sizemask); + + if (flags & DBGPRINT_BUFFADDR) { + /* Move start back to nearest multiple of line size, + * if printing address. This results in nicely formatted output + * with addresses being on line size (16) byte boundaries */ + pstart = (const unsigned char *)((long)buffer & ~(LINE_SIZE-1)); + } else { + pstart = (const unsigned char *)buffer; + } + + /* Set value of start VA to print if addresses asked for */ + pstart_va = (const unsigned char *)buffer_va + - ((const unsigned char *)buffer-pstart); + + /* Calculate end position to nicely align right hand side */ + pend_str = pstart + (((pend-pstart) + LINE_SIZE-1) & ~(LINE_SIZE-1)); + + /* Init strings */ + *pbytes = *pascii = '\0'; + + /* Start at beginning of first line */ + p = pstart; + count = 0; + + while (p < pend_str) { + if (p < (const unsigned char *)buffer || p >= pend) { + /* Before start of buffer or after end- print spaces */ + pbytes += sprintf(pbytes, "%*c ", field_width, ' '); + pascii += sprintf(pascii, "%*c", size, ' '); + p += size; + } else { + /* Add hex and ascii to strings */ + int val; + switch (size) { + default: + case 1: + val = *(unsigned char *)p; + break; + case 2: + val = *(unsigned short *)p; + break; + case 4: + val = *(unsigned int *)p; + break; + } + + pbytes += sprintf(pbytes, "%0*x ", field_width, val); + + for (index = size; index; index--) { + *pascii++ = isprint(*p) ? *p : '.'; + p++; + } + } + + count++; + + if (count == LINE_ENTRIES || p >= pend_str) { + /* Null terminate and print record */ + *pascii = '\0'; + printk(KERN_DEBUG "%s%8.8lx: %*s|%*s|\n", + prefix, + (flags & DBGPRINT_BUFFADDR) + ? (long)pstart_va:(long)offset, + LINE_ENTRIES*(field_width+1), bytes_str, + LINE_SIZE, ascii_str); + + /* Move onto next line */ + pstart_va += (p-pstart); + pstart = p; + count = 0; + offset += LINE_SIZE; + + /* Re-init strings */ + pbytes = bytes_str; + pascii = ascii_str; + *pbytes = *pascii = '\0'; + } + } +} + +#define PSM(A) \ +do { \ + if (mask.A) { \ + int i = snprintf(p, len, " " #A "( %s )", \ + A##s_to_name(val.A)); \ + if (i >= len) \ + return op; \ + p += i; \ + len -= i; \ + } \ +} while (0) + +STATIC char *dump_st(char *p, int len, union drbd_state_t mask, union drbd_state_t val) +{ + char *op = p; + *p = '\0'; + PSM(role); + PSM(peer); + PSM(conn); + PSM(disk); + PSM(pdsk); + + return op; +} + +#define INFOP(fmt, args...) \ +do { \ + if (trace_level >= TraceLvlAll) { \ + INFO("%s:%d: %s [%d] %s %s " fmt , \ + file, line, current->comm, current->pid, \ + sockname, recv ? "<<<" : ">>>" , \ + ## args); \ + } else { \ + INFO("%s %s " fmt, sockname, \ + recv ? "<<<" : ">>>" , \ + ## args); \ + } \ +} while (0) + +STATIC char *_dump_block_id(u64 block_id, char *buff) +{ + if (is_syncer_block_id(block_id)) + strcpy(buff, "SyncerId"); + else + sprintf(buff, "%llx", (unsigned long long)block_id); + + return buff; +} + +void +_dump_packet(struct drbd_conf *mdev, struct socket *sock, + int recv, union Drbd_Polymorph_Packet *p, char *file, int line) +{ + char *sockname = sock == mdev->meta.socket ? "meta" : "data"; + int cmd = (recv == 2) ? p->head.command : be16_to_cpu(p->head.command); + char tmp[300]; + union drbd_state_t m, v; + + switch (cmd) { + case HandShake: + INFOP("%s (protocol %u-%u)\n", cmdname(cmd), + be32_to_cpu(p->HandShake.protocol_min), + be32_to_cpu(p->HandShake.protocol_max)); + break; + + case ReportBitMap: /* don't report this */ + case ReportCBitMap: /* don't report this */ + break; + + case Data: + INFOP("%s (sector %llus, id %s, seq %u, f %x)\n", cmdname(cmd), + (unsigned long long)be64_to_cpu(p->Data.sector), + _dump_block_id(p->Data.block_id, tmp), + be32_to_cpu(p->Data.seq_num), + be32_to_cpu(p->Data.dp_flags) + ); + break; + + case DataReply: + case RSDataReply: + INFOP("%s (sector %llus, id %s)\n", cmdname(cmd), + (unsigned long long)be64_to_cpu(p->Data.sector), + _dump_block_id(p->Data.block_id, tmp) + ); + break; + + case RecvAck: + case WriteAck: + case RSWriteAck: + case DiscardAck: + case NegAck: + case NegRSDReply: + INFOP("%s (sector %llus, size %u, id %s, seq %u)\n", + cmdname(cmd), + (long long)be64_to_cpu(p->BlockAck.sector), + be32_to_cpu(p->BlockAck.blksize), + _dump_block_id(p->BlockAck.block_id, tmp), + be32_to_cpu(p->BlockAck.seq_num) + ); + break; + + case DataRequest: + case RSDataRequest: + INFOP("%s (sector %llus, size %u, id %s)\n", cmdname(cmd), + (long long)be64_to_cpu(p->BlockRequest.sector), + be32_to_cpu(p->BlockRequest.blksize), + _dump_block_id(p->BlockRequest.block_id, tmp) + ); + break; + + case Barrier: + case BarrierAck: + INFOP("%s (barrier %u)\n", cmdname(cmd), p->Barrier.barrier); + break; + + case SyncParam: + case SyncParam89: + INFOP("%s (rate %u, verify-alg \"%.64s\", csums-alg \"%.64s\")\n", + cmdname(cmd), be32_to_cpu(p->SyncParam89.rate), + p->SyncParam89.verify_alg, p->SyncParam89.csums_alg); + break; + + case ReportUUIDs: + INFOP("%s Curr:%016llX, Bitmap:%016llX, " + "HisSt:%016llX, HisEnd:%016llX\n", + cmdname(cmd), + (unsigned long long)be64_to_cpu(p->GenCnt.uuid[Current]), + (unsigned long long)be64_to_cpu(p->GenCnt.uuid[Bitmap]), + (unsigned long long)be64_to_cpu(p->GenCnt.uuid[History_start]), + (unsigned long long)be64_to_cpu(p->GenCnt.uuid[History_end])); + break; + + case ReportSizes: + INFOP("%s (d %lluMiB, u %lluMiB, c %lldMiB, " + "max bio %x, q order %x)\n", + cmdname(cmd), + (long long)(be64_to_cpu(p->Sizes.d_size)>>(20-9)), + (long long)(be64_to_cpu(p->Sizes.u_size)>>(20-9)), + (long long)(be64_to_cpu(p->Sizes.c_size)>>(20-9)), + be32_to_cpu(p->Sizes.max_segment_size), + be32_to_cpu(p->Sizes.queue_order_type)); + break; + + case ReportState: + v.i = be32_to_cpu(p->State.state); + m.i = 0xffffffff; + dump_st(tmp, sizeof(tmp), m, v); + INFOP("%s (s %x {%s})\n", cmdname(cmd), v.i, tmp); + break; + + case StateChgRequest: + m.i = be32_to_cpu(p->ReqState.mask); + v.i = be32_to_cpu(p->ReqState.val); + dump_st(tmp, sizeof(tmp), m, v); + INFOP("%s (m %x v %x {%s})\n", cmdname(cmd), m.i, v.i, tmp); + break; + + case StateChgReply: + INFOP("%s (ret %x)\n", cmdname(cmd), + be32_to_cpu(p->RqSReply.retcode)); + break; + + case Ping: + case PingAck: + /* + * Dont trace pings at summary level + */ + if (trace_level < TraceLvlAll) + break; + /* fall through... */ + default: + INFOP("%s (%u)\n", cmdname(cmd), cmd); + break; + } +} + +/* Debug routine to dump info about bio */ + +void _dump_bio(const char *pfx, struct drbd_conf *mdev, struct bio *bio, int complete, struct drbd_request *r) +{ +#ifdef CONFIG_LBD +#define SECTOR_FORMAT "%Lx" +#else +#define SECTOR_FORMAT "%lx" +#endif +#define SECTOR_SHIFT 9 + + unsigned long lowaddr = (unsigned long)(bio->bi_sector << SECTOR_SHIFT); + char *faddr = (char *)(lowaddr); + char rb[sizeof(void *)*2+6] = { 0, }; + struct bio_vec *bvec; + int segno; + + const int rw = bio->bi_rw; + const int biorw = (rw & (RW_MASK|RWA_MASK)); + const int biobarrier = (rw & (1<>>", + pfx, + biorw == WRITE ? "Write" : "Read", + biobarrier ? " : B" : "", + biosync ? " : S" : "", + bio, + rb, + complete ? (drbd_bio_uptodate(bio) ? "Success, " : "Failed, ") : "", + bio->bi_sector << SECTOR_SHIFT, + bio->bi_size); + + if (trace_level >= TraceLvlMetrics && + ((biorw == WRITE) ^ complete)) { + printk(KERN_DEBUG " ind page offset length\n"); + __bio_for_each_segment(bvec, bio, segno, 0) { + printk(KERN_DEBUG " [%d] %p %8.8x %8.8x\n", segno, + bvec->bv_page, bvec->bv_offset, bvec->bv_len); + + if (trace_level >= TraceLvlAll) { + char *bvec_buf; + unsigned long flags; + + bvec_buf = bvec_kmap_irq(bvec, &flags); + + drbd_print_buffer(" ", DBGPRINT_BUFFADDR, 1, + bvec_buf, + faddr, + (bvec->bv_len <= 0x80) + ? bvec->bv_len : 0x80); + + bvec_kunmap_irq(bvec_buf, &flags); + + if (bvec->bv_len > 0x40) + printk(KERN_DEBUG " ....\n"); + + faddr += bvec->bv_len; + } + } + } +} +#endif + +module_init(drbd_init) +module_exit(drbd_cleanup) -- cgit v1.2.3 From 6174eb18c084817ecc6ef58a2bdfdbf7c500d86b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:16 +0200 Subject: DRBD: receiver Nearly everything of the "receiver" and the "asender" is in this file. The receiver is the thread that processes all data packets. The receiver might gets blocked while waiting for memory or being slowed while submitting IO. The asender on the other hand is used to send out acknowledgements and to receive them. It only blocks while waiting on its socket. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 4375 ++++++++++++++++++++++++++++++++++++ 1 file changed, 4375 insertions(+) create mode 100644 drivers/block/drbd/drbd_receiver.c diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c new file mode 100644 index 000000000000..2a2600d113ee --- /dev/null +++ b/drivers/block/drbd/drbd_receiver.c @@ -0,0 +1,4375 @@ +/* + drbd_receiver.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define __KERNEL_SYSCALLS__ +#include +#include +#include +#include +#include +#include +#include +#include "drbd_int.h" +#include "drbd_req.h" + +#include "drbd_vli.h" + +struct flush_work { + struct drbd_work w; + struct drbd_epoch *epoch; +}; + +enum epoch_event { + EV_put, + EV_got_barrier_nr, + EV_barrier_done, + EV_became_last, + EV_cleanup = 32, /* used as flag */ +}; + +enum finish_epoch { + FE_still_live, + FE_destroyed, + FE_recycled, +}; + +STATIC int drbd_do_handshake(struct drbd_conf *mdev); +STATIC int drbd_do_auth(struct drbd_conf *mdev); + +STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); +STATIC int e_end_block(struct drbd_conf *, struct drbd_work *, int); +static inline struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch) +{ + struct drbd_epoch *prev; + spin_lock(&mdev->epoch_lock); + prev = list_entry(epoch->list.prev, struct drbd_epoch, list); + if (prev == epoch || prev == mdev->current_epoch) + prev = NULL; + spin_unlock(&mdev->epoch_lock); + return prev; +} + +#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) + +/** + * drbd_bp_alloc: Returns a page. Fails only if a signal comes in. + */ +STATIC struct page *drbd_pp_alloc(struct drbd_conf *mdev, gfp_t gfp_mask) +{ + unsigned long flags = 0; + struct page *page; + DEFINE_WAIT(wait); + + spin_lock_irqsave(&drbd_pp_lock, flags); + page = drbd_pp_pool; + if (page) { + drbd_pp_pool = (struct page *)page_private(page); + set_page_private(page, 0); /* just to be polite */ + drbd_pp_vacant--; + } + spin_unlock_irqrestore(&drbd_pp_lock, flags); + if (page) + goto got_page; + + drbd_kick_lo(mdev); + + for (;;) { + prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); + + /* try the pool again, maybe the drbd_kick_lo set some free */ + spin_lock_irqsave(&drbd_pp_lock, flags); + page = drbd_pp_pool; + if (page) { + drbd_pp_pool = (struct page *)page_private(page); + drbd_pp_vacant--; + } + spin_unlock_irqrestore(&drbd_pp_lock, flags); + + if (page) + break; + + /* hm. pool was empty. try to allocate from kernel. + * don't wait, if none is available, though. + */ + if (atomic_read(&mdev->pp_in_use) + < mdev->net_conf->max_buffers) { + page = alloc_page(GFP_TRY); + if (page) + break; + } + + /* doh. still no page. + * either used up the configured maximum number, + * or we are low on memory. + * wait for someone to return a page into the pool. + * unless, of course, someone signalled us. + */ + if (signal_pending(current)) { + drbd_WARN("drbd_pp_alloc interrupted!\n"); + finish_wait(&drbd_pp_wait, &wait); + return NULL; + } + drbd_kick_lo(mdev); + if (!(gfp_mask & __GFP_WAIT)) { + finish_wait(&drbd_pp_wait, &wait); + return NULL; + } + schedule(); + } + finish_wait(&drbd_pp_wait, &wait); + + got_page: + atomic_inc(&mdev->pp_in_use); + return page; +} + +STATIC void drbd_pp_free(struct drbd_conf *mdev, struct page *page) +{ + unsigned long flags = 0; + int free_it; + + spin_lock_irqsave(&drbd_pp_lock, flags); + if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) { + free_it = 1; + } else { + set_page_private(page, (unsigned long)drbd_pp_pool); + drbd_pp_pool = page; + drbd_pp_vacant++; + free_it = 0; + } + spin_unlock_irqrestore(&drbd_pp_lock, flags); + + atomic_dec(&mdev->pp_in_use); + + if (free_it) + __free_page(page); + + wake_up(&drbd_pp_wait); +} + +/* +You need to hold the req_lock: + drbd_free_ee() + _drbd_wait_ee_list_empty() + +You must not have the req_lock: + drbd_alloc_ee() + drbd_init_ee() + drbd_release_ee() + drbd_ee_fix_bhs() + drbd_process_done_ee() + drbd_clear_done_ee() + drbd_wait_ee_list_empty() +*/ + +struct Tl_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, + u64 id, + sector_t sector, + unsigned int data_size, + gfp_t gfp_mask) __must_hold(local) +{ + struct request_queue *q; + struct Tl_epoch_entry *e; + struct bio_vec *bvec; + struct page *page; + struct bio *bio; + unsigned int ds; + int i; + + e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); + if (!e) { + if (!(gfp_mask & __GFP_NOWARN)) + ERR("alloc_ee: Allocation of an EE failed\n"); + return NULL; + } + + bio = bio_alloc(gfp_mask & ~__GFP_HIGHMEM, div_ceil(data_size, PAGE_SIZE)); + if (!bio) { + if (!(gfp_mask & __GFP_NOWARN)) + ERR("alloc_ee: Allocation of a bio failed\n"); + goto fail1; + } + + bio->bi_bdev = mdev->bc->backing_bdev; + bio->bi_sector = sector; + + ds = data_size; + while (ds) { + page = drbd_pp_alloc(mdev, gfp_mask); + if (!page) { + if (!(gfp_mask & __GFP_NOWARN)) + ERR("alloc_ee: Allocation of a page failed\n"); + goto fail2; + } + if (!bio_add_page(bio, page, min_t(int, ds, PAGE_SIZE), 0)) { + drbd_pp_free(mdev, page); + ERR("alloc_ee: bio_add_page(s=%llu," + "data_size=%u,ds=%u) failed\n", + (unsigned long long)sector, data_size, ds); + + q = bdev_get_queue(bio->bi_bdev); + if (q->merge_bvec_fn) { + struct bvec_merge_data bvm = { + .bi_bdev = bio->bi_bdev, + .bi_sector = bio->bi_sector, + .bi_size = bio->bi_size, + .bi_rw = bio->bi_rw, + }; + int l = q->merge_bvec_fn(q, &bvm, + &bio->bi_io_vec[bio->bi_vcnt]); + ERR("merge_bvec_fn() = %d\n", l); + } + + /* dump more of the bio. */ + DUMPI(bio->bi_max_vecs); + DUMPI(bio->bi_vcnt); + DUMPI(bio->bi_size); + DUMPI(bio->bi_phys_segments); + + goto fail2; + break; + } + ds -= min_t(int, ds, PAGE_SIZE); + } + + D_ASSERT(data_size == bio->bi_size); + + bio->bi_private = e; + e->mdev = mdev; + e->sector = sector; + e->size = bio->bi_size; + + e->private_bio = bio; + e->block_id = id; + INIT_HLIST_NODE(&e->colision); + e->epoch = NULL; + e->flags = 0; + + MTRACE(TraceTypeEE, TraceLvlAll, + INFO("allocated EE sec=%llus size=%u ee=%p\n", + (unsigned long long)sector, data_size, e); + ); + + return e; + + fail2: + __bio_for_each_segment(bvec, bio, i, 0) { + drbd_pp_free(mdev, bvec->bv_page); + } + bio_put(bio); + fail1: + mempool_free(e, drbd_ee_mempool); + + return NULL; +} + +void drbd_free_ee(struct drbd_conf *mdev, struct Tl_epoch_entry *e) +{ + struct bio *bio = e->private_bio; + struct bio_vec *bvec; + int i; + + MTRACE(TraceTypeEE, TraceLvlAll, + INFO("Free EE sec=%llus size=%u ee=%p\n", + (unsigned long long)e->sector, e->size, e); + ); + + __bio_for_each_segment(bvec, bio, i, 0) { + drbd_pp_free(mdev, bvec->bv_page); + } + + bio_put(bio); + + D_ASSERT(hlist_unhashed(&e->colision)); + + mempool_free(e, drbd_ee_mempool); +} + +/* currently on module unload only */ +int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) +{ + int count = 0; + struct Tl_epoch_entry *e; + struct list_head *le; + + spin_lock_irq(&mdev->req_lock); + while (!list_empty(list)) { + le = list->next; + list_del(le); + e = list_entry(le, struct Tl_epoch_entry, w.list); + drbd_free_ee(mdev, e); + count++; + } + spin_unlock_irq(&mdev->req_lock); + + return count; +} + + +STATIC void reclaim_net_ee(struct drbd_conf *mdev) +{ + struct Tl_epoch_entry *e; + struct list_head *le, *tle; + + /* The EEs are always appended to the end of the list. Since + they are sent in order over the wire, they have to finish + in order. As soon as we see the first not finished we can + stop to examine the list... */ + + list_for_each_safe(le, tle, &mdev->net_ee) { + e = list_entry(le, struct Tl_epoch_entry, w.list); + if (drbd_bio_has_active_page(e->private_bio)) + break; + list_del(le); + drbd_free_ee(mdev, e); + } +} + + +/* + * This function is called from _asender only_ + * but see also comments in _req_mod(,barrier_acked) + * and receive_Barrier. + * + * Move entries from net_ee to done_ee, if ready. + * Grab done_ee, call all callbacks, free the entries. + * The callbacks typically send out ACKs. + */ +STATIC int drbd_process_done_ee(struct drbd_conf *mdev) +{ + LIST_HEAD(work_list); + struct Tl_epoch_entry *e, *t; + int ok = 1; + + spin_lock_irq(&mdev->req_lock); + reclaim_net_ee(mdev); + list_splice_init(&mdev->done_ee, &work_list); + spin_unlock_irq(&mdev->req_lock); + + /* possible callbacks here: + * e_end_block, and e_end_resync_block, e_send_discard_ack. + * all ignore the last argument. + */ + list_for_each_entry_safe(e, t, &work_list, w.list) { + MTRACE(TraceTypeEE, TraceLvlAll, + INFO("Process EE on done_ee sec=%llus size=%u ee=%p\n", + (unsigned long long)e->sector, e->size, e); + ); + /* list_del not necessary, next/prev members not touched */ + if (e->w.cb(mdev, &e->w, 0) == 0) + ok = 0; + drbd_free_ee(mdev, e); + } + wake_up(&mdev->ee_wait); + + return ok; +} + + + +/* clean-up helper for drbd_disconnect */ +void _drbd_clear_done_ee(struct drbd_conf *mdev) +{ + struct list_head *le; + struct Tl_epoch_entry *e; + struct drbd_epoch *epoch; + int n = 0; + + + reclaim_net_ee(mdev); + + while (!list_empty(&mdev->done_ee)) { + le = mdev->done_ee.next; + list_del(le); + e = list_entry(le, struct Tl_epoch_entry, w.list); + if (mdev->net_conf->wire_protocol == DRBD_PROT_C + || is_syncer_block_id(e->block_id)) + ++n; + + if (!hlist_unhashed(&e->colision)) + hlist_del_init(&e->colision); + + if (e->epoch) { + if (e->flags & EE_IS_BARRIER) { + epoch = previous_epoch(mdev, e->epoch); + if (epoch) + drbd_may_finish_epoch(mdev, epoch, EV_barrier_done + EV_cleanup); + } + drbd_may_finish_epoch(mdev, e->epoch, EV_put + EV_cleanup); + } + drbd_free_ee(mdev, e); + } + + sub_unacked(mdev, n); +} + +void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) +{ + DEFINE_WAIT(wait); + + /* avoids spin_lock/unlock + * and calling prepare_to_wait in the fast path */ + while (!list_empty(head)) { + prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&mdev->req_lock); + drbd_kick_lo(mdev); + schedule(); + finish_wait(&mdev->ee_wait, &wait); + spin_lock_irq(&mdev->req_lock); + } +} + +void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) +{ + spin_lock_irq(&mdev->req_lock); + _drbd_wait_ee_list_empty(mdev, head); + spin_unlock_irq(&mdev->req_lock); +} + +/* see also kernel_accept; which is only present since 2.6.18. + * also we want to log which part of it failed, exactly */ +STATIC int drbd_accept(struct drbd_conf *mdev, const char **what, + struct socket *sock, struct socket **newsock) +{ + struct sock *sk = sock->sk; + int err = 0; + + *what = "listen"; + err = sock->ops->listen(sock, 5); + if (err < 0) + goto out; + + *what = "sock_create_lite"; + err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, + newsock); + if (err < 0) + goto out; + + *what = "accept"; + err = sock->ops->accept(sock, *newsock, 0); + if (err < 0) { + sock_release(*newsock); + *newsock = NULL; + goto out; + } + (*newsock)->ops = sock->ops; + +out: + return err; +} + +STATIC int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock, + void *buf, size_t size, int flags) +{ + mm_segment_t oldfs; + struct kvec iov = { + .iov_base = buf, + .iov_len = size, + }; + struct msghdr msg = { + .msg_iovlen = 1, + .msg_iov = (struct iovec *)&iov, + .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) + }; + int rv; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + rv = sock_recvmsg(sock, &msg, size, msg.msg_flags); + set_fs(oldfs); + + return rv; +} + +STATIC int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) +{ + mm_segment_t oldfs; + struct kvec iov = { + .iov_base = buf, + .iov_len = size, + }; + struct msghdr msg = { + .msg_iovlen = 1, + .msg_iov = (struct iovec *)&iov, + .msg_flags = MSG_WAITALL | MSG_NOSIGNAL + }; + int rv; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + + for (;;) { + rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags); + if (rv == size) + break; + + /* Note: + * ECONNRESET other side closed the connection + * ERESTARTSYS (on sock) we got a signal + */ + + if (rv < 0) { + if (rv == -ECONNRESET) + INFO("sock was reset by peer\n"); + else if (rv != -ERESTARTSYS) + ERR("sock_recvmsg returned %d\n", rv); + break; + } else if (rv == 0) { + INFO("sock was shut down by peer\n"); + break; + } else { + /* signal came in, or peer/link went down, + * after we read a partial message + */ + /* D_ASSERT(signal_pending(current)); */ + break; + } + }; + + set_fs(oldfs); + + if (rv != size) + drbd_force_state(mdev, NS(conn, BrokenPipe)); + + return rv; +} + +STATIC struct socket *drbd_try_connect(struct drbd_conf *mdev) +{ + const char *what; + struct socket *sock; + struct sockaddr_in6 src_in6; + int err; + int disconnect_on_error = 1; + + if (!inc_net(mdev)) + return NULL; + + what = "sock_create_kern"; + err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family, + SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + sock = NULL; + goto out; + } + + sock->sk->sk_rcvtimeo = + sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; + + /* explicitly bind to the configured IP as source IP + * for the outgoing connections. + * This is needed for multihomed hosts and to be + * able to use lo: interfaces for drbd. + * Make sure to use 0 as portnumber, so linux selects + * a free one dynamically. + */ + memcpy(&src_in6, mdev->net_conf->my_addr, + min_t(int, mdev->net_conf->my_addr_len, sizeof(src_in6))); + if (((struct sockaddr *)mdev->net_conf->my_addr)->sa_family == AF_INET6) + src_in6.sin6_port = 0; + else + ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ + + what = "bind before connect"; + err = sock->ops->bind(sock, + (struct sockaddr *) &src_in6, + mdev->net_conf->my_addr_len); + if (err < 0) + goto out; + + /* connect may fail, peer not yet available. + * stay WFConnection, don't go Disconnecting! */ + disconnect_on_error = 0; + what = "connect"; + err = sock->ops->connect(sock, + (struct sockaddr *)mdev->net_conf->peer_addr, + mdev->net_conf->peer_addr_len, 0); + +out: + if (err < 0) { + if (sock) { + sock_release(sock); + sock = NULL; + } + switch (-err) { + /* timeout, busy, signal pending */ + case ETIMEDOUT: case EAGAIN: case EINPROGRESS: + case EINTR: case ERESTARTSYS: + /* peer not (yet) available, network problem */ + case ECONNREFUSED: case ENETUNREACH: + case EHOSTDOWN: case EHOSTUNREACH: + disconnect_on_error = 0; + break; + default: + ERR("%s failed, err = %d\n", what, err); + } + if (disconnect_on_error) + drbd_force_state(mdev, NS(conn, Disconnecting)); + } + dec_net(mdev); + return sock; +} + +STATIC struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) +{ + int timeo, err; + struct socket *s_estab = NULL, *s_listen; + const char *what; + + if (!inc_net(mdev)) + return NULL; + + what = "sock_create_kern"; + err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family, + SOCK_STREAM, IPPROTO_TCP, &s_listen); + if (err) { + s_listen = NULL; + goto out; + } + + timeo = mdev->net_conf->try_connect_int * HZ; + timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ + + s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ + s_listen->sk->sk_rcvtimeo = timeo; + s_listen->sk->sk_sndtimeo = timeo; + + what = "bind before listen"; + err = s_listen->ops->bind(s_listen, + (struct sockaddr *) mdev->net_conf->my_addr, + mdev->net_conf->my_addr_len); + if (err < 0) + goto out; + + err = drbd_accept(mdev, &what, s_listen, &s_estab); + +out: + if (s_listen) + sock_release(s_listen); + if (err < 0) { + if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { + ERR("%s failed, err = %d\n", what, err); + drbd_force_state(mdev, NS(conn, Disconnecting)); + } + } + dec_net(mdev); + + return s_estab; +} + +STATIC int drbd_send_fp(struct drbd_conf *mdev, + struct socket *sock, enum Drbd_Packet_Cmd cmd) +{ + struct Drbd_Header *h = (struct Drbd_Header *) &mdev->data.sbuf.head; + + return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); +} + +STATIC enum Drbd_Packet_Cmd drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) +{ + struct Drbd_Header *h = (struct Drbd_Header *) &mdev->data.sbuf.head; + int rr; + + rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); + + if (rr == sizeof(*h) && h->magic == BE_DRBD_MAGIC) + return be16_to_cpu(h->command); + + return 0xffff; +} + +/** + * drbd_socket_okay: + * Tests if the connection behind the socket still exists. If not it frees + * the socket. + */ +static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) +{ + int rr; + char tb[4]; + + if (!*sock) + return FALSE; + + rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); + + if (rr > 0 || rr == -EAGAIN) { + return TRUE; + } else { + sock_release(*sock); + *sock = NULL; + return FALSE; + } +} + +/* + * return values: + * 1 yess, we have a valid connection + * 0 oops, did not work out, please try again + * -1 peer talks different language, + * no point in trying again, please go standalone. + * -2 We do not have a network config... + */ +STATIC int drbd_connect(struct drbd_conf *mdev) +{ + struct socket *s, *sock, *msock; + int try, h, ok; + + D_ASSERT(!mdev->data.socket); + + if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) + ERR("CREATE_BARRIER flag was set in drbd_connect - now cleared!\n"); + + if (drbd_request_state(mdev, NS(conn, WFConnection)) < SS_Success) + return -2; + + clear_bit(DISCARD_CONCURRENT, &mdev->flags); + + sock = NULL; + msock = NULL; + + do { + for (try = 0;;) { + /* 3 tries, this should take less than a second! */ + s = drbd_try_connect(mdev); + if (s || ++try >= 3) + break; + /* give the other side time to call bind() & listen() */ + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 10); + } + + if (s) { + if (!sock) { + drbd_send_fp(mdev, s, HandShakeS); + sock = s; + s = NULL; + } else if (!msock) { + drbd_send_fp(mdev, s, HandShakeM); + msock = s; + s = NULL; + } else { + ERR("Logic error in drbd_connect()\n"); + return -1; + } + } + + if (sock && msock) { + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 10); + ok = drbd_socket_okay(mdev, &sock); + ok = drbd_socket_okay(mdev, &msock) && ok; + if (ok) + break; + } + +retry: + s = drbd_wait_for_connect(mdev); + if (s) { + try = drbd_recv_fp(mdev, s); + drbd_socket_okay(mdev, &sock); + drbd_socket_okay(mdev, &msock); + switch (try) { + case HandShakeS: + if (sock) { + drbd_WARN("initial packet S crossed\n"); + sock_release(sock); + } + sock = s; + break; + case HandShakeM: + if (msock) { + drbd_WARN("initial packet M crossed\n"); + sock_release(msock); + } + msock = s; + set_bit(DISCARD_CONCURRENT, &mdev->flags); + break; + default: + drbd_WARN("Error receiving initial packet\n"); + sock_release(s); + if (random32() & 1) + goto retry; + } + } + + if (mdev->state.conn <= Disconnecting) + return -1; + if (signal_pending(current)) { + flush_signals(current); + smp_rmb(); + if (get_t_state(&mdev->receiver) == Exiting) { + if (sock) + sock_release(sock); + if (msock) + sock_release(msock); + return -1; + } + } + + if (sock && msock) { + ok = drbd_socket_okay(mdev, &sock); + ok = drbd_socket_okay(mdev, &msock) && ok; + if (ok) + break; + } + } while (1); + + msock->sk->sk_reuse = 1; /* SO_REUSEADDR */ + sock->sk->sk_reuse = 1; /* SO_REUSEADDR */ + + sock->sk->sk_allocation = GFP_NOIO; + msock->sk->sk_allocation = GFP_NOIO; + + sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; + msock->sk->sk_priority = TC_PRIO_INTERACTIVE; + + if (mdev->net_conf->sndbuf_size) { + sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size; + sock->sk->sk_rcvbuf = mdev->net_conf->sndbuf_size; + sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK; + } + + /* NOT YET ... + * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; + * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; + * first set it to the HandShake timeout, wich is hardcoded for now: */ + sock->sk->sk_sndtimeo = + sock->sk->sk_rcvtimeo = 2*HZ; + + msock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; + msock->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; + + /* we don't want delays. + * we use TCP_CORK where apropriate, though */ + drbd_tcp_nodelay(sock); + drbd_tcp_nodelay(msock); + + mdev->data.socket = sock; + mdev->meta.socket = msock; + mdev->last_received = jiffies; + + D_ASSERT(mdev->asender.task == NULL); + + h = drbd_do_handshake(mdev); + if (h <= 0) + return h; + + if (mdev->cram_hmac_tfm) { + /* drbd_request_state(mdev, NS(conn, WFAuth)); */ + if (!drbd_do_auth(mdev)) { + ERR("Authentication of peer failed\n"); + return -1; + } + } + + if (drbd_request_state(mdev, NS(conn, WFReportParams)) < SS_Success) + return 0; + + sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; + sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; + + atomic_set(&mdev->packet_seq, 0); + mdev->peer_seq = 0; + + drbd_thread_start(&mdev->asender); + + drbd_send_protocol(mdev); + drbd_send_sync_param(mdev, &mdev->sync_conf); + drbd_send_sizes(mdev); + drbd_send_uuids(mdev); + drbd_send_state(mdev); + clear_bit(USE_DEGR_WFC_T, &mdev->flags); + + return 1; +} + +STATIC int drbd_recv_header(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + int r; + + r = drbd_recv(mdev, h, sizeof(*h)); + + if (unlikely(r != sizeof(*h))) { + ERR("short read expecting header on sock: r=%d\n", r); + return FALSE; + }; + h->command = be16_to_cpu(h->command); + h->length = be16_to_cpu(h->length); + if (unlikely(h->magic != BE_DRBD_MAGIC)) { + ERR("magic?? on data m: 0x%lx c: %d l: %d\n", + (long)be32_to_cpu(h->magic), + h->command, h->length); + return FALSE; + } + mdev->last_received = jiffies; + + return TRUE; +} + +STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch) +{ + int rv; + + if (mdev->write_ordering >= WO_bdev_flush && inc_local(mdev)) { + rv = blkdev_issue_flush(mdev->bc->backing_bdev, NULL); + if (rv) { + ERR("local disk flush failed with status %d\n", rv); + /* would rather check on EOPNOTSUPP, but that is not reliable. + * don't try again for ANY return value != 0 + * if (rv == -EOPNOTSUPP) */ + drbd_bump_write_ordering(mdev, WO_drain_io); + } + dec_local(mdev); + } + + return drbd_may_finish_epoch(mdev, epoch, EV_barrier_done); +} + +/** + * w_flush: Checks if an epoch can be closed and therefore might + * close and/or free the epoch object. + */ +STATIC int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct flush_work *fw = (struct flush_work *)w; + struct drbd_epoch *epoch = fw->epoch; + + kfree(w); + + if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags)) + drbd_flush_after_epoch(mdev, epoch); + + drbd_may_finish_epoch(mdev, epoch, EV_put | + (mdev->state.conn < Connected ? EV_cleanup : 0)); + + return 1; +} + +/** + * drbd_may_finish_epoch: Checks if an epoch can be closed and therefore might + * close and/or free the epoch object. + */ +STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, + struct drbd_epoch *epoch, + enum epoch_event ev) +{ + int finish, epoch_size; + struct drbd_epoch *next_epoch; + int schedule_flush = 0; + enum finish_epoch rv = FE_still_live; + + static char *epoch_event_str[] = { + [EV_put] = "put", + [EV_got_barrier_nr] = "got_barrier_nr", + [EV_barrier_done] = "barrier_done", + [EV_became_last] = "became_last", + }; + + spin_lock(&mdev->epoch_lock); + do { + next_epoch = NULL; + finish = 0; + + epoch_size = atomic_read(&epoch->epoch_size); + + switch (ev & ~EV_cleanup) { + case EV_put: + atomic_dec(&epoch->active); + break; + case EV_got_barrier_nr: + set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags); + + /* Special case: If we just switched from WO_bio_barrier to + WO_bdev_flush we should not finish the current epoch */ + if (test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) && epoch_size == 1 && + mdev->write_ordering != WO_bio_barrier && + epoch == mdev->current_epoch) + clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags); + break; + case EV_barrier_done: + set_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags); + break; + case EV_became_last: + /* nothing to do*/ + break; + } + + MTRACE(TraceTypeEpochs, TraceLvlAll, + INFO("Update epoch %p/%d { size=%d active=%d %c%c n%c%c } ev=%s\n", + epoch, epoch->barrier_nr, epoch_size, atomic_read(&epoch->active), + test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) ? 'n' : '-', + test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) ? 'b' : '-', + test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) ? 'i' : '-', + test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ? 'd' : '-', + epoch_event_str[ev]); + ); + + if (epoch_size != 0 && + atomic_read(&epoch->active) == 0 && + test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) && + epoch->list.prev == &mdev->current_epoch->list && + !test_bit(DE_IS_FINISHING, &epoch->flags)) { + /* Nearly all conditions are met to finish that epoch... */ + if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) || + mdev->write_ordering == WO_none || + (epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) || + ev & EV_cleanup) { + finish = 1; + set_bit(DE_IS_FINISHING, &epoch->flags); + } else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) && + mdev->write_ordering == WO_bio_barrier) { + atomic_inc(&epoch->active); + schedule_flush = 1; + } + } + if (finish) { + if (!(ev & EV_cleanup)) { + spin_unlock(&mdev->epoch_lock); + drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); + spin_lock(&mdev->epoch_lock); + } + dec_unacked(mdev); + + if (mdev->current_epoch != epoch) { + next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); + list_del(&epoch->list); + ev = EV_became_last | (ev & EV_cleanup); + mdev->epochs--; + MTRACE(TraceTypeEpochs, TraceLvlSummary, + INFO("Freeing epoch %p/%d { size=%d } nr_epochs=%d\n", + epoch, epoch->barrier_nr, epoch_size, mdev->epochs); + ); + kfree(epoch); + + if (rv == FE_still_live) + rv = FE_destroyed; + } else { + epoch->flags = 0; + atomic_set(&epoch->epoch_size, 0); + /* atomic_set(&epoch->active, 0); is alrady zero */ + if (rv == FE_still_live) + rv = FE_recycled; + } + } + + if (!next_epoch) + break; + + epoch = next_epoch; + } while (1); + + spin_unlock(&mdev->epoch_lock); + + if (schedule_flush) { + struct flush_work *fw; + fw = kmalloc(sizeof(*fw), GFP_ATOMIC); + if (fw) { + MTRACE(TraceTypeEpochs, TraceLvlMetrics, + INFO("Schedul flush %p/%d { size=%d } nr_epochs=%d\n", + epoch, epoch->barrier_nr, epoch_size, mdev->epochs); + ); + fw->w.cb = w_flush; + fw->epoch = epoch; + drbd_queue_work(&mdev->data.work, &fw->w); + } else { + drbd_WARN("Could not kmalloc a flush_work obj\n"); + set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); + /* That is not a recursion, only one level */ + drbd_may_finish_epoch(mdev, epoch, EV_barrier_done); + drbd_may_finish_epoch(mdev, epoch, EV_put); + } + } + + return rv; +} + +/** + * drbd_bump_write_ordering: It turned out that the current mdev->write_ordering + * method does not work on the backing block device. Try the next allowed method. + */ +void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local) +{ + enum write_ordering_e pwo; + static char *write_ordering_str[] = { + [WO_none] = "none", + [WO_drain_io] = "drain", + [WO_bdev_flush] = "flush", + [WO_bio_barrier] = "barrier", + }; + + pwo = mdev->write_ordering; + wo = min(pwo, wo); + if (wo == WO_bio_barrier && mdev->bc->dc.no_disk_barrier) + wo = WO_bdev_flush; + if (wo == WO_bdev_flush && mdev->bc->dc.no_disk_flush) + wo = WO_drain_io; + if (wo == WO_drain_io && mdev->bc->dc.no_disk_drain) + wo = WO_none; + mdev->write_ordering = wo; + if (pwo != mdev->write_ordering || wo == WO_bio_barrier) + INFO("Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]); +} + +/** + * w_e_reissue: In case the IO subsystem delivered an error for an BIO with the + * BIO_RW_BARRIER flag set, retry that bio without the barrier flag set. + */ +int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local) +{ + struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct bio *bio = e->private_bio; + + /* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place, + (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch) + so that we can finish that epoch in drbd_may_finish_epoch(). + That is necessary if we already have a long chain of Epochs, before + we realize that BIO_RW_BARRIER is actually not supported */ + + /* As long as the -ENOTSUPP on the barrier is reported immediately + that will never trigger. It it is reported late, we will just + print that warning an continue corretly for all future requests + with WO_bdev_flush */ + if (previous_epoch(mdev, e->epoch)) + drbd_WARN("Write ordering was not enforced (one time event)\n"); + + /* prepare bio for re-submit, + * re-init volatile members */ + /* we still have a local reference, + * inc_local was done in receive_Data. */ + bio->bi_bdev = mdev->bc->backing_bdev; + bio->bi_sector = e->sector; + bio->bi_size = e->size; + bio->bi_idx = 0; + + bio->bi_flags &= ~(BIO_POOL_MASK - 1); + bio->bi_flags |= 1 << BIO_UPTODATE; + + /* don't know whether this is necessary: */ + bio->bi_phys_segments = 0; + bio->bi_next = NULL; + + /* these should be unchanged: */ + /* bio->bi_end_io = drbd_endio_write_sec; */ + /* bio->bi_vcnt = whatever; */ + + e->w.cb = e_end_block; + + /* This is no longer a barrier request. */ + bio->bi_rw &= ~(1UL << BIO_RW_BARRIER); + + drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, bio); + + return 1; +} + +STATIC int receive_Barrier(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + int rv, issue_flush; + struct Drbd_Barrier_Packet *p = (struct Drbd_Barrier_Packet *)h; + struct drbd_epoch *epoch; + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; + + rv = drbd_recv(mdev, h->payload, h->length); + ERR_IF(rv != h->length) return FALSE; + + inc_unacked(mdev); + + if (mdev->net_conf->wire_protocol != DRBD_PROT_C) + drbd_kick_lo(mdev); + + mdev->current_epoch->barrier_nr = p->barrier; + rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_got_barrier_nr); + + /* BarrierAck may imply that the corresponding extent is dropped from + * the activity log, which means it would not be resynced in case the + * Primary crashes now. + * Therefore we must send the barrier_ack after the barrier request was + * completed. */ + switch (mdev->write_ordering) { + case WO_bio_barrier: + case WO_none: + if (rv == FE_recycled) + return TRUE; + break; + + case WO_bdev_flush: + case WO_drain_io: + D_ASSERT(rv == FE_still_live); + set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags); + drbd_wait_ee_list_empty(mdev, &mdev->active_ee); + rv = drbd_flush_after_epoch(mdev, mdev->current_epoch); + if (rv == FE_recycled) + return TRUE; + + /* The asender will send all the ACKs and barrier ACKs out, since + all EEs moved from the active_ee to the done_ee. We need to + provide a new epoch object for the EEs that come in soon */ + break; + } + + epoch = kmalloc(sizeof(struct drbd_epoch), GFP_KERNEL); + if (!epoch) { + drbd_WARN("Allocation of an epoch failed, slowing down\n"); + issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); + drbd_wait_ee_list_empty(mdev, &mdev->active_ee); + if (issue_flush) { + rv = drbd_flush_after_epoch(mdev, mdev->current_epoch); + if (rv == FE_recycled) + return TRUE; + } + + drbd_wait_ee_list_empty(mdev, &mdev->done_ee); + + return TRUE; + } + + epoch->flags = 0; + atomic_set(&epoch->epoch_size, 0); + atomic_set(&epoch->active, 0); + + spin_lock(&mdev->epoch_lock); + if (atomic_read(&mdev->current_epoch->epoch_size)) { + list_add(&epoch->list, &mdev->current_epoch->list); + mdev->current_epoch = epoch; + mdev->epochs++; + MTRACE(TraceTypeEpochs, TraceLvlMetrics, + INFO("Allocat epoch %p/xxxx { } nr_epochs=%d\n", epoch, mdev->epochs); + ); + } else { + /* The current_epoch got recycled while we allocated this one... */ + kfree(epoch); + } + spin_unlock(&mdev->epoch_lock); + + return TRUE; +} + +/* used from receive_RSDataReply (recv_resync_read) + * and from receive_Data */ +STATIC struct Tl_epoch_entry * +read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local) +{ + struct Tl_epoch_entry *e; + struct bio_vec *bvec; + struct page *page; + struct bio *bio; + int dgs, ds, i, rr; + void *dig_in = mdev->int_dig_in; + void *dig_vv = mdev->int_dig_vv; + + dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? + crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; + + if (dgs) { + rr = drbd_recv(mdev, dig_in, dgs); + if (rr != dgs) { + drbd_WARN("short read receiving data digest: read %d expected %d\n", + rr, dgs); + return NULL; + } + } + + data_size -= dgs; + + ERR_IF(data_size & 0x1ff) return NULL; + ERR_IF(data_size > DRBD_MAX_SEGMENT_SIZE) return NULL; + + e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_KERNEL); + if (!e) + return NULL; + bio = e->private_bio; + ds = data_size; + bio_for_each_segment(bvec, bio, i) { + page = bvec->bv_page; + rr = drbd_recv(mdev, kmap(page), min_t(int, ds, PAGE_SIZE)); + kunmap(page); + if (rr != min_t(int, ds, PAGE_SIZE)) { + drbd_free_ee(mdev, e); + drbd_WARN("short read receiving data: read %d expected %d\n", + rr, min_t(int, ds, PAGE_SIZE)); + return NULL; + } + ds -= rr; + } + + if (dgs) { + drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv); + if (memcmp(dig_in, dig_vv, dgs)) { + ERR("Digest integrity check FAILED.\n"); + drbd_bcast_ee(mdev, "digest failed", + dgs, dig_in, dig_vv, e); + drbd_free_ee(mdev, e); + return NULL; + } + } + mdev->recv_cnt += data_size>>9; + return e; +} + +/* drbd_drain_block() just takes a data block + * out of the socket input buffer, and discards it. + */ +STATIC int drbd_drain_block(struct drbd_conf *mdev, int data_size) +{ + struct page *page; + int rr, rv = 1; + void *data; + + page = drbd_pp_alloc(mdev, GFP_KERNEL); + + data = kmap(page); + while (data_size) { + rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE)); + if (rr != min_t(int, data_size, PAGE_SIZE)) { + rv = 0; + drbd_WARN("short read receiving data: read %d expected %d\n", + rr, min_t(int, data_size, PAGE_SIZE)); + break; + } + data_size -= rr; + } + kunmap(page); + drbd_pp_free(mdev, page); + return rv; +} + +/* kick lower level device, if we have more than (arbitrary number) + * reference counts on it, which typically are locally submitted io + * requests. don't use unacked_cnt, so we speed up proto A and B, too. */ +static void maybe_kick_lo(struct drbd_conf *mdev) +{ + if (atomic_read(&mdev->local_cnt) >= mdev->net_conf->unplug_watermark) + drbd_kick_lo(mdev); +} + +STATIC int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, + sector_t sector, int data_size) +{ + struct bio_vec *bvec; + struct bio *bio; + int dgs, rr, i, expect; + void *dig_in = mdev->int_dig_in; + void *dig_vv = mdev->int_dig_vv; + + dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? + crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; + + if (dgs) { + rr = drbd_recv(mdev, dig_in, dgs); + if (rr != dgs) { + drbd_WARN("short read receiving data reply digest: read %d expected %d\n", + rr, dgs); + return 0; + } + } + + data_size -= dgs; + + bio = req->master_bio; + D_ASSERT(sector == bio->bi_sector); + + bio_for_each_segment(bvec, bio, i) { + expect = min_t(int, data_size, bvec->bv_len); + rr = drbd_recv(mdev, + kmap(bvec->bv_page)+bvec->bv_offset, + expect); + kunmap(bvec->bv_page); + if (rr != expect) { + drbd_WARN("short read receiving data reply: " + "read %d expected %d\n", + rr, expect); + return 0; + } + data_size -= rr; + } + + if (dgs) { + drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv); + if (memcmp(dig_in, dig_vv, dgs)) { + ERR("Digest integrity check FAILED. Broken NICs?\n"); + return 0; + } + } + + D_ASSERT(data_size == 0); + return 1; +} + +/* e_end_resync_block() is called via + * drbd_process_done_ee() by asender only */ +STATIC int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + sector_t sector = e->sector; + int ok; + + D_ASSERT(hlist_unhashed(&e->colision)); + + if (likely(drbd_bio_uptodate(e->private_bio))) { + drbd_set_in_sync(mdev, sector, e->size); + ok = drbd_send_ack(mdev, RSWriteAck, e); + } else { + /* Record failure to sync */ + drbd_rs_failed_io(mdev, sector, e->size); + + ok = drbd_send_ack(mdev, NegAck, e); + ok &= drbd_io_error(mdev, FALSE); + } + dec_unacked(mdev); + + return ok; +} + +STATIC int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) +{ + struct Tl_epoch_entry *e; + + e = read_in_block(mdev, ID_SYNCER, sector, data_size); + if (!e) { + dec_local(mdev); + return FALSE; + } + + dec_rs_pending(mdev); + + e->private_bio->bi_end_io = drbd_endio_write_sec; + e->private_bio->bi_rw = WRITE; + e->w.cb = e_end_resync_block; + + inc_unacked(mdev); + /* corresponding dec_unacked() in e_end_resync_block() + * respective _drbd_clear_done_ee */ + + spin_lock_irq(&mdev->req_lock); + list_add(&e->w.list, &mdev->sync_ee); + spin_unlock_irq(&mdev->req_lock); + + MTRACE(TraceTypeEE, TraceLvlAll, + INFO("submit EE (RS)WRITE sec=%llus size=%u ee=%p\n", + (unsigned long long)e->sector, e->size, e); + ); + dump_internal_bio("Sec", mdev, e->private_bio, 0); + drbd_generic_make_request(mdev, DRBD_FAULT_RS_WR, e->private_bio); + /* accounting done in endio */ + + maybe_kick_lo(mdev); + return TRUE; +} + +STATIC int receive_DataReply(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct drbd_request *req; + sector_t sector; + unsigned int header_size, data_size; + int ok; + struct Drbd_Data_Packet *p = (struct Drbd_Data_Packet *)h; + + header_size = sizeof(*p) - sizeof(*h); + data_size = h->length - header_size; + + ERR_IF(data_size == 0) return FALSE; + + if (drbd_recv(mdev, h->payload, header_size) != header_size) + return FALSE; + + sector = be64_to_cpu(p->sector); + + spin_lock_irq(&mdev->req_lock); + req = _ar_id_to_req(mdev, p->block_id, sector); + spin_unlock_irq(&mdev->req_lock); + if (unlikely(!req)) { + ERR("Got a corrupt block_id/sector pair(1).\n"); + return FALSE; + } + + /* hlist_del(&req->colision) is done in _req_may_be_done, to avoid + * special casing it there for the various failure cases. + * still no race with drbd_fail_pending_reads */ + ok = recv_dless_read(mdev, req, sector, data_size); + + if (ok) + req_mod(req, data_received, 0); + /* else: nothing. handled from drbd_disconnect... + * I don't think we may complete this just yet + * in case we are "on-disconnect: freeze" */ + + return ok; +} + +STATIC int receive_RSDataReply(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + sector_t sector; + unsigned int header_size, data_size; + int ok; + struct Drbd_Data_Packet *p = (struct Drbd_Data_Packet *)h; + + header_size = sizeof(*p) - sizeof(*h); + data_size = h->length - header_size; + + ERR_IF(data_size == 0) return FALSE; + + if (drbd_recv(mdev, h->payload, header_size) != header_size) + return FALSE; + + sector = be64_to_cpu(p->sector); + D_ASSERT(p->block_id == ID_SYNCER); + + if (inc_local(mdev)) { + /* data is submitted to disk within recv_resync_read. + * corresponding dec_local done below on error, + * or in drbd_endio_write_sec. */ + ok = recv_resync_read(mdev, sector, data_size); + } else { + if (__ratelimit(&drbd_ratelimit_state)) + ERR("Can not write resync data to local disk.\n"); + + ok = drbd_drain_block(mdev, data_size); + + drbd_send_ack_dp(mdev, NegAck, p); + } + + return ok; +} + +/* e_end_block() is called via drbd_process_done_ee(). + * this means this function only runs in the asender thread + */ +STATIC int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + sector_t sector = e->sector; + struct drbd_epoch *epoch; + int ok = 1, pcmd; + + if (e->flags & EE_IS_BARRIER) { + epoch = previous_epoch(mdev, e->epoch); + if (epoch) + drbd_may_finish_epoch(mdev, epoch, EV_barrier_done); + } + + if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { + if (likely(drbd_bio_uptodate(e->private_bio))) { + pcmd = (mdev->state.conn >= SyncSource && + mdev->state.conn <= PausedSyncT && + e->flags & EE_MAY_SET_IN_SYNC) ? + RSWriteAck : WriteAck; + ok &= drbd_send_ack(mdev, pcmd, e); + if (pcmd == RSWriteAck) + drbd_set_in_sync(mdev, sector, e->size); + } else { + ok = drbd_send_ack(mdev, NegAck, e); + ok &= drbd_io_error(mdev, FALSE); + /* we expect it to be marked out of sync anyways... + * maybe assert this? */ + } + dec_unacked(mdev); + } else if (unlikely(!drbd_bio_uptodate(e->private_bio))) { + ok = drbd_io_error(mdev, FALSE); + } + + /* we delete from the conflict detection hash _after_ we sent out the + * WriteAck / NegAck, to get the sequence number right. */ + if (mdev->net_conf->two_primaries) { + spin_lock_irq(&mdev->req_lock); + D_ASSERT(!hlist_unhashed(&e->colision)); + hlist_del_init(&e->colision); + spin_unlock_irq(&mdev->req_lock); + } else { + D_ASSERT(hlist_unhashed(&e->colision)); + } + + drbd_may_finish_epoch(mdev, e->epoch, EV_put); + + return ok; +} + +STATIC int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + int ok = 1; + + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + ok = drbd_send_ack(mdev, DiscardAck, e); + + spin_lock_irq(&mdev->req_lock); + D_ASSERT(!hlist_unhashed(&e->colision)); + hlist_del_init(&e->colision); + spin_unlock_irq(&mdev->req_lock); + + dec_unacked(mdev); + + return ok; +} + +/* Called from receive_Data. + * Synchronize packets on sock with packets on msock. + * + * This is here so even when a Data packet traveling via sock overtook an Ack + * packet traveling on msock, they are still processed in the order they have + * been sent. + * + * Note: we don't care for Ack packets overtaking Data packets. + * + * In case packet_seq is larger than mdev->peer_seq number, there are + * outstanding packets on the msock. We wait for them to arrive. + * In case we are the logically next packet, we update mdev->peer_seq + * ourselves. Correctly handles 32bit wrap around. + * + * Assume we have a 10 GBit connection, that is about 1<<30 byte per second, + * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds + * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have + * 1<<9 == 512 seconds aka ages for the 32bit wrap around... + * + * returns 0 if we may process the packet, + * -ERESTARTSYS if we were interrupted (by disconnect signal). */ +static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) +{ + DEFINE_WAIT(wait); + unsigned int p_seq; + long timeout; + int ret = 0; + spin_lock(&mdev->peer_seq_lock); + for (;;) { + prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); + if (seq_le(packet_seq, mdev->peer_seq+1)) + break; + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + p_seq = mdev->peer_seq; + spin_unlock(&mdev->peer_seq_lock); + timeout = schedule_timeout(30*HZ); + spin_lock(&mdev->peer_seq_lock); + if (timeout == 0 && p_seq == mdev->peer_seq) { + ret = -ETIMEDOUT; + ERR("ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n"); + break; + } + } + finish_wait(&mdev->seq_wait, &wait); + if (mdev->peer_seq+1 == packet_seq) + mdev->peer_seq++; + spin_unlock(&mdev->peer_seq_lock); + return ret; +} + +/* mirrored write */ +STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + sector_t sector; + struct Tl_epoch_entry *e; + struct Drbd_Data_Packet *p = (struct Drbd_Data_Packet *)h; + int header_size, data_size; + int rw = WRITE; + u32 dp_flags; + + header_size = sizeof(*p) - sizeof(*h); + data_size = h->length - header_size; + + ERR_IF(data_size == 0) return FALSE; + + if (drbd_recv(mdev, h->payload, header_size) != header_size) + return FALSE; + + if (!inc_local(mdev)) { + /* data is submitted to disk at the end of this function. + * corresponding dec_local done either below (on error), + * or in drbd_endio_write_sec. */ + if (__ratelimit(&drbd_ratelimit_state)) + ERR("Can not write mirrored data block " + "to local disk.\n"); + spin_lock(&mdev->peer_seq_lock); + if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num)) + mdev->peer_seq++; + spin_unlock(&mdev->peer_seq_lock); + + drbd_send_ack_dp(mdev, NegAck, p); + atomic_inc(&mdev->current_epoch->epoch_size); + return drbd_drain_block(mdev, data_size); + } + + sector = be64_to_cpu(p->sector); + e = read_in_block(mdev, p->block_id, sector, data_size); + if (!e) { + dec_local(mdev); + return FALSE; + } + + e->private_bio->bi_end_io = drbd_endio_write_sec; + e->w.cb = e_end_block; + + spin_lock(&mdev->epoch_lock); + e->epoch = mdev->current_epoch; + atomic_inc(&e->epoch->epoch_size); + atomic_inc(&e->epoch->active); + + if (mdev->write_ordering == WO_bio_barrier && atomic_read(&e->epoch->epoch_size) == 1) { + struct drbd_epoch *epoch; + /* Issue a barrier if we start a new epoch, and the previous epoch + was not a epoch containing a single request which already was + a Barrier. */ + epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list); + if (epoch == e->epoch) { + MTRACE(TraceTypeEpochs, TraceLvlMetrics, + INFO("Add barrier %p/%d\n", + epoch, epoch->barrier_nr); + ); + set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); + rw |= (1<flags |= EE_IS_BARRIER; + } else { + if (atomic_read(&epoch->epoch_size) > 1 || + !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) { + MTRACE(TraceTypeEpochs, TraceLvlMetrics, + INFO("Add barrier %p/%d, setting bi in %p/%d\n", + e->epoch, e->epoch->barrier_nr, + epoch, epoch->barrier_nr); + ); + set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); + set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); + rw |= (1<flags |= EE_IS_BARRIER; + } + } + } + spin_unlock(&mdev->epoch_lock); + + dp_flags = be32_to_cpu(p->dp_flags); + if (dp_flags & DP_HARDBARRIER) + rw |= (1<flags |= EE_MAY_SET_IN_SYNC; + + /* I'm the receiver, I do hold a net_cnt reference. */ + if (!mdev->net_conf->two_primaries) { + spin_lock_irq(&mdev->req_lock); + } else { + /* don't get the req_lock yet, + * we may sleep in drbd_wait_peer_seq */ + const int size = e->size; + const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); + DEFINE_WAIT(wait); + struct drbd_request *i; + struct hlist_node *n; + struct hlist_head *slot; + int first; + + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + BUG_ON(mdev->ee_hash == NULL); + BUG_ON(mdev->tl_hash == NULL); + + /* conflict detection and handling: + * 1. wait on the sequence number, + * in case this data packet overtook ACK packets. + * 2. check our hash tables for conflicting requests. + * we only need to walk the tl_hash, since an ee can not + * have a conflict with an other ee: on the submitting + * node, the corresponding req had already been conflicting, + * and a conflicting req is never sent. + * + * Note: for two_primaries, we are protocol C, + * so there cannot be any request that is DONE + * but still on the transfer log. + * + * unconditionally add to the ee_hash. + * + * if no conflicting request is found: + * submit. + * + * if any conflicting request is found + * that has not yet been acked, + * AND I have the "discard concurrent writes" flag: + * queue (via done_ee) the DiscardAck; OUT. + * + * if any conflicting request is found: + * block the receiver, waiting on misc_wait + * until no more conflicting requests are there, + * or we get interrupted (disconnect). + * + * we do not just write after local io completion of those + * requests, but only after req is done completely, i.e. + * we wait for the DiscardAck to arrive! + * + * then proceed normally, i.e. submit. + */ + if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num))) + goto out_interrupted; + + spin_lock_irq(&mdev->req_lock); + + hlist_add_head(&e->colision, ee_hash_slot(mdev, sector)); + +#define OVERLAPS overlaps(i->sector, i->size, sector, size) + slot = tl_hash_slot(mdev, sector); + first = 1; + for (;;) { + int have_unacked = 0; + int have_conflict = 0; + prepare_to_wait(&mdev->misc_wait, &wait, + TASK_INTERRUPTIBLE); + hlist_for_each_entry(i, n, slot, colision) { + if (OVERLAPS) { + /* only ALERT on first iteration, + * we may be woken up early... */ + if (first) + ALERT("%s[%u] Concurrent local write detected!" + " new: %llus +%u; pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)i->sector, i->size); + if (i->rq_state & RQ_NET_PENDING) + ++have_unacked; + ++have_conflict; + } + } +#undef OVERLAPS + if (!have_conflict) + break; + + /* Discard Ack only for the _first_ iteration */ + if (first && discard && have_unacked) { + ALERT("Concurrent write! [DISCARD BY FLAG] sec=%llus\n", + (unsigned long long)sector); + inc_unacked(mdev); + e->w.cb = e_send_discard_ack; + list_add_tail(&e->w.list, &mdev->done_ee); + + spin_unlock_irq(&mdev->req_lock); + + /* we could probably send that DiscardAck ourselves, + * but I don't like the receiver using the msock */ + + dec_local(mdev); + wake_asender(mdev); + finish_wait(&mdev->misc_wait, &wait); + return TRUE; + } + + if (signal_pending(current)) { + hlist_del_init(&e->colision); + + spin_unlock_irq(&mdev->req_lock); + + finish_wait(&mdev->misc_wait, &wait); + goto out_interrupted; + } + + spin_unlock_irq(&mdev->req_lock); + if (first) { + first = 0; + ALERT("Concurrent write! [W AFTERWARDS] " + "sec=%llus\n", (unsigned long long)sector); + } else if (discard) { + /* we had none on the first iteration. + * there must be none now. */ + D_ASSERT(have_unacked == 0); + } + schedule(); + spin_lock_irq(&mdev->req_lock); + } + finish_wait(&mdev->misc_wait, &wait); + } + + list_add(&e->w.list, &mdev->active_ee); + spin_unlock_irq(&mdev->req_lock); + + switch (mdev->net_conf->wire_protocol) { + case DRBD_PROT_C: + inc_unacked(mdev); + /* corresponding dec_unacked() in e_end_block() + * respective _drbd_clear_done_ee */ + break; + case DRBD_PROT_B: + /* I really don't like it that the receiver thread + * sends on the msock, but anyways */ + drbd_send_ack(mdev, RecvAck, e); + break; + case DRBD_PROT_A: + /* nothing to do */ + break; + } + + if (mdev->state.pdsk == Diskless) { + /* In case we have the only disk of the cluster, */ + drbd_set_out_of_sync(mdev, e->sector, e->size); + e->flags |= EE_CALL_AL_COMPLETE_IO; + drbd_al_begin_io(mdev, e->sector); + } + + MTRACE(TraceTypeEE, TraceLvlAll, + INFO("submit EE (DATA)WRITE sec=%llus size=%u ee=%p\n", + (unsigned long long)e->sector, e->size, e); + ); + + e->private_bio->bi_rw = rw; + dump_internal_bio("Sec", mdev, e->private_bio, 0); + drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, e->private_bio); + /* accounting done in endio */ + + maybe_kick_lo(mdev); + return TRUE; + +out_interrupted: + /* yes, the epoch_size now is imbalanced. + * but we drop the connection anyways, so we don't have a chance to + * receive a barrier... atomic_inc(&mdev->epoch_size); */ + dec_local(mdev); + drbd_free_ee(mdev, e); + return FALSE; +} + +STATIC int receive_DataRequest(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + sector_t sector; + const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + struct Tl_epoch_entry *e; + struct digest_info *di; + int size, digest_size; + unsigned int fault_type; + struct Drbd_BlockRequest_Packet *p = + (struct Drbd_BlockRequest_Packet *)h; + const int brps = sizeof(*p)-sizeof(*h); + + if (drbd_recv(mdev, h->payload, brps) != brps) + return FALSE; + + sector = be64_to_cpu(p->sector); + size = be32_to_cpu(p->blksize); + + if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { + ERR("%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, + (unsigned long long)sector, size); + return FALSE; + } + if (sector + (size>>9) > capacity) { + ERR("%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, + (unsigned long long)sector, size); + return FALSE; + } + + if (!inc_local_if_state(mdev, UpToDate)) { + if (__ratelimit(&drbd_ratelimit_state)) + ERR("Can not satisfy peer's read request, " + "no local data.\n"); + drbd_send_ack_rp(mdev, h->command == DataRequest ? NegDReply : + NegRSDReply , p); + return TRUE; + } + + e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_KERNEL); + if (!e) { + dec_local(mdev); + return FALSE; + } + + e->private_bio->bi_rw = READ; + e->private_bio->bi_end_io = drbd_endio_read_sec; + + switch (h->command) { + case DataRequest: + e->w.cb = w_e_end_data_req; + fault_type = DRBD_FAULT_DT_RD; + break; + case RSDataRequest: + e->w.cb = w_e_end_rsdata_req; + fault_type = DRBD_FAULT_RS_RD; + /* Eventually this should become asynchrously. Currently it + * blocks the whole receiver just to delay the reading of a + * resync data block. + * the drbd_work_queue mechanism is made for this... + */ + if (!drbd_rs_begin_io(mdev, sector)) { + /* we have been interrupted, + * probably connection lost! */ + D_ASSERT(signal_pending(current)); + dec_local(mdev); + drbd_free_ee(mdev, e); + return 0; + } + break; + + case OVReply: + case CsumRSRequest: + fault_type = DRBD_FAULT_RS_RD; + digest_size = h->length - brps ; + di = kmalloc(sizeof(*di) + digest_size, GFP_KERNEL); + if (!di) { + dec_local(mdev); + drbd_free_ee(mdev, e); + return 0; + } + + di->digest_size = digest_size; + di->digest = (((char *)di)+sizeof(struct digest_info)); + + if (drbd_recv(mdev, di->digest, digest_size) != digest_size) { + dec_local(mdev); + drbd_free_ee(mdev, e); + kfree(di); + return FALSE; + } + + e->block_id = (u64)(unsigned long)di; + if (h->command == CsumRSRequest) { + D_ASSERT(mdev->agreed_pro_version >= 89); + e->w.cb = w_e_end_csum_rs_req; + } else if (h->command == OVReply) { + e->w.cb = w_e_end_ov_reply; + dec_rs_pending(mdev); + break; + } + + if (!drbd_rs_begin_io(mdev, sector)) { + /* we have been interrupted, probably connection lost! */ + D_ASSERT(signal_pending(current)); + drbd_free_ee(mdev, e); + kfree(di); + dec_local(mdev); + return FALSE; + } + break; + + case OVRequest: + e->w.cb = w_e_end_ov_req; + fault_type = DRBD_FAULT_RS_RD; + /* Eventually this should become asynchrously. Currently it + * blocks the whole receiver just to delay the reading of a + * resync data block. + * the drbd_work_queue mechanism is made for this... + */ + if (!drbd_rs_begin_io(mdev, sector)) { + /* we have been interrupted, + * probably connection lost! */ + D_ASSERT(signal_pending(current)); + dec_local(mdev); + drbd_free_ee(mdev, e); + return 0; + } + break; + + + default: + ERR("unexpected command (%s) in receive_DataRequest\n", + cmdname(h->command)); + fault_type = DRBD_FAULT_MAX; + } + + spin_lock_irq(&mdev->req_lock); + list_add(&e->w.list, &mdev->read_ee); + spin_unlock_irq(&mdev->req_lock); + + inc_unacked(mdev); + + MTRACE(TraceTypeEE, TraceLvlAll, + INFO("submit EE READ sec=%llus size=%u ee=%p\n", + (unsigned long long)e->sector, e->size, e); + ); + + dump_internal_bio("Sec", mdev, e->private_bio, 0); + drbd_generic_make_request(mdev, fault_type, e->private_bio); + maybe_kick_lo(mdev); + + return TRUE; +} + +STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) +{ + int self, peer, rv = -100; + unsigned long ch_self, ch_peer; + + self = mdev->bc->md.uuid[Bitmap] & 1; + peer = mdev->p_uuid[Bitmap] & 1; + + ch_peer = mdev->p_uuid[UUID_SIZE]; + ch_self = mdev->comm_bm_set; + + switch (mdev->net_conf->after_sb_0p) { + case Consensus: + case DiscardSecondary: + case CallHelper: + ERR("Configuration error.\n"); + break; + case Disconnect: + break; + case DiscardYoungerPri: + if (self == 0 && peer == 1) { + rv = -1; + break; + } + if (self == 1 && peer == 0) { + rv = 1; + break; + } + /* Else fall through to one of the other strategies... */ + case DiscardOlderPri: + if (self == 0 && peer == 1) { + rv = 1; + break; + } + if (self == 1 && peer == 0) { + rv = -1; + break; + } + /* Else fall through to one of the other strategies... */ + drbd_WARN("Discard younger/older primary did not found a decision\n" + "Using discard-least-changes instead\n"); + case DiscardZeroChg: + if (ch_peer == 0 && ch_self == 0) { + rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) + ? -1 : 1; + break; + } else { + if (ch_peer == 0) { rv = 1; break; } + if (ch_self == 0) { rv = -1; break; } + } + if (mdev->net_conf->after_sb_0p == DiscardZeroChg) + break; + case DiscardLeastChg: + if (ch_self < ch_peer) + rv = -1; + else if (ch_self > ch_peer) + rv = 1; + else /* ( ch_self == ch_peer ) */ + /* Well, then use something else. */ + rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) + ? -1 : 1; + break; + case DiscardLocal: + rv = -1; + break; + case DiscardRemote: + rv = 1; + } + + return rv; +} + +STATIC int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) +{ + int self, peer, hg, rv = -100; + + self = mdev->bc->md.uuid[Bitmap] & 1; + peer = mdev->p_uuid[Bitmap] & 1; + + switch (mdev->net_conf->after_sb_1p) { + case DiscardYoungerPri: + case DiscardOlderPri: + case DiscardLeastChg: + case DiscardLocal: + case DiscardRemote: + ERR("Configuration error.\n"); + break; + case Disconnect: + break; + case Consensus: + hg = drbd_asb_recover_0p(mdev); + if (hg == -1 && mdev->state.role == Secondary) + rv = hg; + if (hg == 1 && mdev->state.role == Primary) + rv = hg; + break; + case Violently: + rv = drbd_asb_recover_0p(mdev); + break; + case DiscardSecondary: + return mdev->state.role == Primary ? 1 : -1; + case CallHelper: + hg = drbd_asb_recover_0p(mdev); + if (hg == -1 && mdev->state.role == Primary) { + self = drbd_set_role(mdev, Secondary, 0); + if (self != SS_Success) { + drbd_khelper(mdev, "pri-lost-after-sb"); + } else { + drbd_WARN("Sucessfully gave up primary role.\n"); + rv = hg; + } + } else + rv = hg; + } + + return rv; +} + +STATIC int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) +{ + int self, peer, hg, rv = -100; + + self = mdev->bc->md.uuid[Bitmap] & 1; + peer = mdev->p_uuid[Bitmap] & 1; + + switch (mdev->net_conf->after_sb_2p) { + case DiscardYoungerPri: + case DiscardOlderPri: + case DiscardLeastChg: + case DiscardLocal: + case DiscardRemote: + case Consensus: + case DiscardSecondary: + ERR("Configuration error.\n"); + break; + case Violently: + rv = drbd_asb_recover_0p(mdev); + break; + case Disconnect: + break; + case CallHelper: + hg = drbd_asb_recover_0p(mdev); + if (hg == -1) { + self = drbd_set_role(mdev, Secondary, 0); + if (self != SS_Success) { + drbd_khelper(mdev, "pri-lost-after-sb"); + } else { + drbd_WARN("Sucessfully gave up primary role.\n"); + rv = hg; + } + } else + rv = hg; + } + + return rv; +} + +STATIC void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid, + u64 bits, u64 flags) +{ + if (!uuid) { + INFO("%s uuid info vanished while I was looking!\n", text); + return; + } + INFO("%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", + text, + (unsigned long long)uuid[Current], + (unsigned long long)uuid[Bitmap], + (unsigned long long)uuid[History_start], + (unsigned long long)uuid[History_end], + (unsigned long long)bits, + (unsigned long long)flags); +} + +/* + 100 after split brain try auto recover + 2 SyncSource set BitMap + 1 SyncSource use BitMap + 0 no Sync + -1 SyncTarget use BitMap + -2 SyncTarget set BitMap + -100 after split brain, disconnect +-1000 unrelated data + */ +STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local) +{ + u64 self, peer; + int i, j; + + self = mdev->bc->md.uuid[Current] & ~((u64)1); + peer = mdev->p_uuid[Current] & ~((u64)1); + + *rule_nr = 1; + if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED) + return 0; + + *rule_nr = 2; + if ((self == UUID_JUST_CREATED || self == (u64)0) && + peer != UUID_JUST_CREATED) + return -2; + + *rule_nr = 3; + if (self != UUID_JUST_CREATED && + (peer == UUID_JUST_CREATED || peer == (u64)0)) + return 2; + + *rule_nr = 4; + if (self == peer) { /* Common power [off|failure] */ + int rct, dc; /* roles at crash time */ + + rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) + + (mdev->p_uuid[UUID_FLAGS] & 2); + /* lowest bit is set when we were primary, + * next bit (weight 2) is set when peer was primary */ + + MTRACE(TraceTypeUuid, TraceLvlMetrics, DUMPI(rct);); + + switch (rct) { + case 0: /* !self_pri && !peer_pri */ return 0; + case 1: /* self_pri && !peer_pri */ return 1; + case 2: /* !self_pri && peer_pri */ return -1; + case 3: /* self_pri && peer_pri */ + dc = test_bit(DISCARD_CONCURRENT, &mdev->flags); + MTRACE(TraceTypeUuid, TraceLvlMetrics, DUMPI(dc);); + return dc ? -1 : 1; + } + } + + *rule_nr = 5; + peer = mdev->p_uuid[Bitmap] & ~((u64)1); + if (self == peer) + return -1; + + *rule_nr = 6; + for (i = History_start; i <= History_end; i++) { + peer = mdev->p_uuid[i] & ~((u64)1); + if (self == peer) + return -2; + } + + *rule_nr = 7; + self = mdev->bc->md.uuid[Bitmap] & ~((u64)1); + peer = mdev->p_uuid[Current] & ~((u64)1); + if (self == peer) + return 1; + + *rule_nr = 8; + for (i = History_start; i <= History_end; i++) { + self = mdev->bc->md.uuid[i] & ~((u64)1); + if (self == peer) + return 2; + } + + *rule_nr = 9; + self = mdev->bc->md.uuid[Bitmap] & ~((u64)1); + peer = mdev->p_uuid[Bitmap] & ~((u64)1); + if (self == peer && self != ((u64)0)) + return 100; + + *rule_nr = 10; + for (i = History_start; i <= History_end; i++) { + self = mdev->p_uuid[i] & ~((u64)1); + for (j = History_start; j <= History_end; j++) { + peer = mdev->p_uuid[j] & ~((u64)1); + if (self == peer) + return -100; + } + } + + return -1000; +} + +/* drbd_sync_handshake() returns the new conn state on success, or + conn_mask (-1) on failure. + */ +STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role, + enum drbd_disk_state peer_disk) __must_hold(local) +{ + int hg, rule_nr; + enum drbd_conns rv = conn_mask; + enum drbd_disk_state mydisk; + + mydisk = mdev->state.disk; + if (mydisk == Negotiating) + mydisk = mdev->new_state_tmp.disk; + + hg = drbd_uuid_compare(mdev, &rule_nr); + + INFO("drbd_sync_handshake:\n"); + drbd_uuid_dump(mdev, "self", mdev->bc->md.uuid, + mdev->state.disk >= Negotiating ? drbd_bm_total_weight(mdev) : 0, 0); + drbd_uuid_dump(mdev, "peer", mdev->p_uuid, + mdev->p_uuid[UUID_SIZE], mdev->p_uuid[UUID_FLAGS]); + INFO("uuid_compare()=%d by rule %d\n", hg, rule_nr); + + if (hg == -1000) { + ALERT("Unrelated data, aborting!\n"); + return conn_mask; + } + + if ((mydisk == Inconsistent && peer_disk > Inconsistent) || + (peer_disk == Inconsistent && mydisk > Inconsistent)) { + int f = (hg == -100) || abs(hg) == 2; + hg = mydisk > Inconsistent ? 1 : -1; + if (f) + hg = hg*2; + INFO("Becoming sync %s due to disk states.\n", + hg > 0 ? "source" : "target"); + } + + if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) { + int pcount = (mdev->state.role == Primary) + + (peer_role == Primary); + int forced = (hg == -100); + + switch (pcount) { + case 0: + hg = drbd_asb_recover_0p(mdev); + break; + case 1: + hg = drbd_asb_recover_1p(mdev); + break; + case 2: + hg = drbd_asb_recover_2p(mdev); + break; + } + if (abs(hg) < 100) { + drbd_WARN("Split-Brain detected, %d primaries, " + "automatically solved. Sync from %s node\n", + pcount, (hg < 0) ? "peer" : "this"); + if (forced) { + drbd_WARN("Doing a full sync, since" + " UUIDs where ambiguous.\n"); + hg = hg*2; + } + } + } + + if (hg == -100) { + if (mdev->net_conf->want_lose && !(mdev->p_uuid[UUID_FLAGS]&1)) + hg = -1; + if (!mdev->net_conf->want_lose && (mdev->p_uuid[UUID_FLAGS]&1)) + hg = 1; + + if (abs(hg) < 100) + drbd_WARN("Split-Brain detected, manually solved. " + "Sync from %s node\n", + (hg < 0) ? "peer" : "this"); + } + + if (hg == -100) { + ALERT("Split-Brain detected, dropping connection!\n"); + drbd_khelper(mdev, "split-brain"); + return conn_mask; + } + + if (hg > 0 && mydisk <= Inconsistent) { + ERR("I shall become SyncSource, but I am inconsistent!\n"); + return conn_mask; + } + + if (hg < 0 && /* by intention we do not use mydisk here. */ + mdev->state.role == Primary && mdev->state.disk >= Consistent) { + switch (mdev->net_conf->rr_conflict) { + case CallHelper: + drbd_khelper(mdev, "pri-lost"); + /* fall through */ + case Disconnect: + ERR("I shall become SyncTarget, but I am primary!\n"); + return conn_mask; + case Violently: + drbd_WARN("Becoming SyncTarget, violating the stable-data" + "assumption\n"); + } + } + + if (abs(hg) >= 2) { + INFO("Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); + if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) + return conn_mask; + } + + if (hg > 0) { /* become sync source. */ + rv = WFBitMapS; + } else if (hg < 0) { /* become sync target */ + rv = WFBitMapT; + } else { + rv = Connected; + if (drbd_bm_total_weight(mdev)) { + INFO("No resync, but %lu bits in bitmap!\n", + drbd_bm_total_weight(mdev)); + } + } + + drbd_bm_recount_bits(mdev); + + return rv; +} + +/* returns 1 if invalid */ +STATIC int cmp_after_sb(enum after_sb_handler peer, enum after_sb_handler self) +{ + /* DiscardRemote - DiscardLocal is valid */ + if ((peer == DiscardRemote && self == DiscardLocal) || + (self == DiscardRemote && peer == DiscardLocal)) + return 0; + + /* any other things with DiscardRemote or DiscardLocal are invalid */ + if (peer == DiscardRemote || peer == DiscardLocal || + self == DiscardRemote || self == DiscardLocal) + return 1; + + /* everything else is valid if they are equal on both sides. */ + if (peer == self) + return 0; + + /* everything es is invalid. */ + return 1; +} + +STATIC int receive_protocol(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct Drbd_Protocol_Packet *p = (struct Drbd_Protocol_Packet *)h; + int header_size, data_size; + int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; + int p_want_lose, p_two_primaries; + char p_integrity_alg[SHARED_SECRET_MAX] = ""; + + header_size = sizeof(*p) - sizeof(*h); + data_size = h->length - header_size; + + if (drbd_recv(mdev, h->payload, header_size) != header_size) + return FALSE; + + p_proto = be32_to_cpu(p->protocol); + p_after_sb_0p = be32_to_cpu(p->after_sb_0p); + p_after_sb_1p = be32_to_cpu(p->after_sb_1p); + p_after_sb_2p = be32_to_cpu(p->after_sb_2p); + p_want_lose = be32_to_cpu(p->want_lose); + p_two_primaries = be32_to_cpu(p->two_primaries); + + if (p_proto != mdev->net_conf->wire_protocol) { + ERR("incompatible communication protocols\n"); + goto disconnect; + } + + if (cmp_after_sb(p_after_sb_0p, mdev->net_conf->after_sb_0p)) { + ERR("incompatible after-sb-0pri settings\n"); + goto disconnect; + } + + if (cmp_after_sb(p_after_sb_1p, mdev->net_conf->after_sb_1p)) { + ERR("incompatible after-sb-1pri settings\n"); + goto disconnect; + } + + if (cmp_after_sb(p_after_sb_2p, mdev->net_conf->after_sb_2p)) { + ERR("incompatible after-sb-2pri settings\n"); + goto disconnect; + } + + if (p_want_lose && mdev->net_conf->want_lose) { + ERR("both sides have the 'want_lose' flag set\n"); + goto disconnect; + } + + if (p_two_primaries != mdev->net_conf->two_primaries) { + ERR("incompatible setting of the two-primaries options\n"); + goto disconnect; + } + + if (mdev->agreed_pro_version >= 87) { + unsigned char *my_alg = mdev->net_conf->integrity_alg; + + if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) + return FALSE; + + p_integrity_alg[SHARED_SECRET_MAX-1] = 0; + if (strcmp(p_integrity_alg, my_alg)) { + ERR("incompatible setting of the data-integrity-alg\n"); + goto disconnect; + } + INFO("data-integrity-alg: %s\n", + my_alg[0] ? my_alg : (unsigned char *)""); + } + + return TRUE; + +disconnect: + drbd_force_state(mdev, NS(conn, Disconnecting)); + return FALSE; +} + +/* helper function + * input: alg name, feature name + * return: NULL (alg name was "") + * ERR_PTR(error) if something goes wrong + * or the crypto hash ptr, if it worked out ok. */ +struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, + const char *alg, const char *name) +{ + struct crypto_hash *tfm; + + if (!alg[0]) + return NULL; + + tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + ERR("Can not allocate \"%s\" as %s (reason: %ld)\n", + alg, name, PTR_ERR(tfm)); + return tfm; + } + if (crypto_tfm_alg_type(crypto_hash_tfm(tfm)) != CRYPTO_ALG_TYPE_DIGEST) { + crypto_free_hash(tfm); + ERR("\"%s\" is not a digest (%s)\n", alg, name); + return ERR_PTR(-EINVAL); + } + return tfm; +} + +STATIC int receive_SyncParam(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + int ok = TRUE; + struct Drbd_SyncParam89_Packet *p = (struct Drbd_SyncParam89_Packet *)h; + unsigned int header_size, data_size, exp_max_sz; + struct crypto_hash *verify_tfm = NULL; + struct crypto_hash *csums_tfm = NULL; + const int apv = mdev->agreed_pro_version; + + exp_max_sz = apv <= 87 ? sizeof(struct Drbd_SyncParam_Packet) + : apv == 88 ? sizeof(struct Drbd_SyncParam_Packet) + + SHARED_SECRET_MAX + : /* 89 */ sizeof(struct Drbd_SyncParam89_Packet); + + if (h->length > exp_max_sz) { + ERR("SyncParam packet too long: received %u, expected <= %u bytes\n", + h->length, exp_max_sz); + return FALSE; + } + + if (apv <= 88) { + header_size = sizeof(struct Drbd_SyncParam_Packet) - sizeof(*h); + data_size = h->length - header_size; + } else /* apv >= 89 */ { + header_size = sizeof(struct Drbd_SyncParam89_Packet) - sizeof(*h); + data_size = h->length - header_size; + D_ASSERT(data_size == 0); + } + + /* initialize verify_alg and csums_alg */ + memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); + + if (drbd_recv(mdev, h->payload, header_size) != header_size) + return FALSE; + + mdev->sync_conf.rate = be32_to_cpu(p->rate); + + if (apv >= 88) { + if (apv == 88) { + if (data_size > SHARED_SECRET_MAX) { + ERR("verify-alg too long, " + "peer wants %u, accepting only %u byte\n", + data_size, SHARED_SECRET_MAX); + return FALSE; + } + + if (drbd_recv(mdev, p->verify_alg, data_size) != data_size) + return FALSE; + + /* we expect NUL terminated string */ + /* but just in case someone tries to be evil */ + D_ASSERT(p->verify_alg[data_size-1] == 0); + p->verify_alg[data_size-1] = 0; + + } else /* apv >= 89 */ { + /* we still expect NUL terminated strings */ + /* but just in case someone tries to be evil */ + D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0); + D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0); + p->verify_alg[SHARED_SECRET_MAX-1] = 0; + p->csums_alg[SHARED_SECRET_MAX-1] = 0; + } + + if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) { + if (mdev->state.conn == WFReportParams) { + ERR("Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", + mdev->sync_conf.verify_alg, p->verify_alg); + goto disconnect; + } + verify_tfm = drbd_crypto_alloc_digest_safe(mdev, + p->verify_alg, "verify-alg"); + if (IS_ERR(verify_tfm)) + goto disconnect; + } + + if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) { + if (mdev->state.conn == WFReportParams) { + ERR("Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", + mdev->sync_conf.csums_alg, p->csums_alg); + goto disconnect; + } + csums_tfm = drbd_crypto_alloc_digest_safe(mdev, + p->csums_alg, "csums-alg"); + if (IS_ERR(csums_tfm)) + goto disconnect; + } + + + spin_lock(&mdev->peer_seq_lock); + /* lock against drbd_nl_syncer_conf() */ + if (verify_tfm) { + strcpy(mdev->sync_conf.verify_alg, p->verify_alg); + mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1; + crypto_free_hash(mdev->verify_tfm); + mdev->verify_tfm = verify_tfm; + INFO("using verify-alg: \"%s\"\n", p->verify_alg); + } + if (csums_tfm) { + strcpy(mdev->sync_conf.csums_alg, p->csums_alg); + mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1; + crypto_free_hash(mdev->csums_tfm); + mdev->csums_tfm = csums_tfm; + INFO("using csums-alg: \"%s\"\n", p->csums_alg); + } + spin_unlock(&mdev->peer_seq_lock); + } + + return ok; +disconnect: + crypto_free_hash(verify_tfm); + drbd_force_state(mdev, NS(conn, Disconnecting)); + return FALSE; +} + +STATIC void drbd_setup_order_type(struct drbd_conf *mdev, int peer) +{ + /* sorry, we currently have no working implementation + * of distributed TCQ */ +} + +/* warn if the arguments differ by more than 12.5% */ +static void warn_if_differ_considerably(struct drbd_conf *mdev, + const char *s, sector_t a, sector_t b) +{ + sector_t d; + if (a == 0 || b == 0) + return; + d = (a > b) ? (a - b) : (b - a); + if (d > (a>>3) || d > (b>>3)) + drbd_WARN("Considerable difference in %s: %llus vs. %llus\n", s, + (unsigned long long)a, (unsigned long long)b); +} + +STATIC int receive_sizes(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct Drbd_Sizes_Packet *p = (struct Drbd_Sizes_Packet *)h; + enum determin_dev_size_enum dd = unchanged; + unsigned int max_seg_s; + sector_t p_size, p_usize, my_usize; + int ldsc = 0; /* local disk size changed */ + enum drbd_conns nconn; + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; + if (drbd_recv(mdev, h->payload, h->length) != h->length) + return FALSE; + + p_size = be64_to_cpu(p->d_size); + p_usize = be64_to_cpu(p->u_size); + + if (p_size == 0 && mdev->state.disk == Diskless) { + ERR("some backing storage is needed\n"); + drbd_force_state(mdev, NS(conn, Disconnecting)); + return FALSE; + } + + /* just store the peer's disk size for now. + * we still need to figure out wether we accept that. */ + mdev->p_size = p_size; + +#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) + if (inc_local(mdev)) { + warn_if_differ_considerably(mdev, "lower level device sizes", + p_size, drbd_get_max_capacity(mdev->bc)); + warn_if_differ_considerably(mdev, "user requested size", + p_usize, mdev->bc->dc.disk_size); + + /* if this is the first connect, or an otherwise expected + * param exchange, choose the minimum */ + if (mdev->state.conn == WFReportParams) + p_usize = min_not_zero((sector_t)mdev->bc->dc.disk_size, + p_usize); + + my_usize = mdev->bc->dc.disk_size; + + if (mdev->bc->dc.disk_size != p_usize) { + mdev->bc->dc.disk_size = p_usize; + INFO("Peer sets u_size to %lu sectors\n", + (unsigned long)mdev->bc->dc.disk_size); + } + + /* Never shrink a device with usable data during connect. + But allow online shrinking if we are connected. */ + if (drbd_new_dev_size(mdev, mdev->bc) < + drbd_get_capacity(mdev->this_bdev) && + mdev->state.disk >= Outdated && + mdev->state.conn < Connected) { + ERR("The peer's disk size is too small!\n"); + drbd_force_state(mdev, NS(conn, Disconnecting)); + mdev->bc->dc.disk_size = my_usize; + dec_local(mdev); + return FALSE; + } + dec_local(mdev); + } +#undef min_not_zero + + if (inc_local(mdev)) { + dd = drbd_determin_dev_size(mdev); + dec_local(mdev); + if (dd == dev_size_error) + return FALSE; + drbd_md_sync(mdev); + } else { + /* I am diskless, need to accept the peer's size. */ + drbd_set_my_capacity(mdev, p_size); + } + + if (mdev->p_uuid && mdev->state.conn <= Connected && inc_local(mdev)) { + nconn = drbd_sync_handshake(mdev, + mdev->state.peer, mdev->state.pdsk); + dec_local(mdev); + + if (nconn == conn_mask) { + drbd_force_state(mdev, NS(conn, Disconnecting)); + return FALSE; + } + + if (drbd_request_state(mdev, NS(conn, nconn)) < SS_Success) { + drbd_force_state(mdev, NS(conn, Disconnecting)); + return FALSE; + } + } + + if (inc_local(mdev)) { + if (mdev->bc->known_size != drbd_get_capacity(mdev->bc->backing_bdev)) { + mdev->bc->known_size = drbd_get_capacity(mdev->bc->backing_bdev); + ldsc = 1; + } + + max_seg_s = be32_to_cpu(p->max_segment_size); + if (max_seg_s != mdev->rq_queue->max_segment_size) + drbd_setup_queue_param(mdev, max_seg_s); + + drbd_setup_order_type(mdev, be32_to_cpu(p->queue_order_type)); + dec_local(mdev); + } + + if (mdev->state.conn > WFReportParams) { + if (be64_to_cpu(p->c_size) != + drbd_get_capacity(mdev->this_bdev) || ldsc) { + /* we have different sizes, probabely peer + * needs to know my new size... */ + drbd_send_sizes(mdev); + } + if (dd == grew && mdev->state.conn == Connected) { + if (mdev->state.pdsk >= Inconsistent && + mdev->state.disk >= Inconsistent) + resync_after_online_grow(mdev); + else + set_bit(RESYNC_AFTER_NEG, &mdev->flags); + } + } + + return TRUE; +} + +STATIC int receive_uuids(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct Drbd_GenCnt_Packet *p = (struct Drbd_GenCnt_Packet *)h; + u64 *p_uuid; + int i; + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; + if (drbd_recv(mdev, h->payload, h->length) != h->length) + return FALSE; + + p_uuid = kmalloc(sizeof(u64)*EXT_UUID_SIZE, GFP_KERNEL); + + for (i = Current; i < EXT_UUID_SIZE; i++) + p_uuid[i] = be64_to_cpu(p->uuid[i]); + + kfree(mdev->p_uuid); + mdev->p_uuid = p_uuid; + + if (mdev->state.conn < Connected && + mdev->state.disk < Inconsistent && + mdev->state.role == Primary && + (mdev->ed_uuid & ~((u64)1)) != (p_uuid[Current] & ~((u64)1))) { + ERR("Can only connect to data with current UUID=%016llX\n", + (unsigned long long)mdev->ed_uuid); + drbd_force_state(mdev, NS(conn, Disconnecting)); + return FALSE; + } + + /* Before we test for the disk state, we should wait until an eventually + ongoing cluster wide state change is finished. That is important if + we are primary and are detaching from our disk. We need to see the + new disk state... */ + wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); + if (mdev->state.conn >= Connected && mdev->state.disk < Inconsistent) + drbd_set_ed_uuid(mdev, p_uuid[Current]); + + return TRUE; +} + +/** + * convert_state: + * Switches the view of the state. + */ +STATIC union drbd_state_t convert_state(union drbd_state_t ps) +{ + union drbd_state_t ms; + + static enum drbd_conns c_tab[] = { + [Connected] = Connected, + + [StartingSyncS] = StartingSyncT, + [StartingSyncT] = StartingSyncS, + [Disconnecting] = TearDown, /* NetworkFailure, */ + [VerifyS] = VerifyT, + [conn_mask] = conn_mask, + }; + + ms.i = ps.i; + + ms.conn = c_tab[ps.conn]; + ms.peer = ps.role; + ms.role = ps.peer; + ms.pdsk = ps.disk; + ms.disk = ps.pdsk; + ms.peer_isp = (ps.aftr_isp | ps.user_isp); + + return ms; +} + +STATIC int receive_req_state(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct Drbd_Req_State_Packet *p = (struct Drbd_Req_State_Packet *)h; + union drbd_state_t mask, val; + int rv; + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; + if (drbd_recv(mdev, h->payload, h->length) != h->length) + return FALSE; + + mask.i = be32_to_cpu(p->mask); + val.i = be32_to_cpu(p->val); + + if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && + test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { + drbd_send_sr_reply(mdev, SS_ConcurrentStChg); + return TRUE; + } + + mask = convert_state(mask); + val = convert_state(val); + + rv = drbd_change_state(mdev, ChgStateVerbose, mask, val); + + drbd_send_sr_reply(mdev, rv); + drbd_md_sync(mdev); + + return TRUE; +} + +STATIC int receive_state(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct Drbd_State_Packet *p = (struct Drbd_State_Packet *)h; + enum drbd_conns nconn, oconn; + union drbd_state_t ns, peer_state; + enum drbd_disk_state real_peer_disk; + int rv; + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) + return FALSE; + + if (drbd_recv(mdev, h->payload, h->length) != h->length) + return FALSE; + + peer_state.i = be32_to_cpu(p->state); + + real_peer_disk = peer_state.disk; + if (peer_state.disk == Negotiating) { + real_peer_disk = mdev->p_uuid[UUID_FLAGS] & 4 ? Inconsistent : Consistent; + INFO("real peer disk state = %s\n", disks_to_name(real_peer_disk)); + } + + spin_lock_irq(&mdev->req_lock); + retry: + oconn = nconn = mdev->state.conn; + spin_unlock_irq(&mdev->req_lock); + + if (nconn == WFReportParams) + nconn = Connected; + + if (mdev->p_uuid && peer_state.disk >= Negotiating && + inc_local_if_state(mdev, Negotiating)) { + int cr; /* consider resync */ + + cr = (oconn < Connected); + cr |= (oconn == Connected && + (peer_state.disk == Negotiating || + mdev->state.disk == Negotiating)); + cr |= test_bit(CONSIDER_RESYNC, &mdev->flags); /* peer forced */ + cr |= (oconn == Connected && peer_state.conn > Connected); + + if (cr) + nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk); + + dec_local(mdev); + if (nconn == conn_mask) { + if (mdev->state.disk == Negotiating) { + drbd_force_state(mdev, NS(disk, Diskless)); + nconn = Connected; + } else if (peer_state.disk == Negotiating) { + ERR("Disk attach process on the peer node was aborted.\n"); + peer_state.disk = Diskless; + } else { + D_ASSERT(oconn == WFReportParams); + drbd_force_state(mdev, NS(conn, Disconnecting)); + return FALSE; + } + } + } + + spin_lock_irq(&mdev->req_lock); + if (mdev->state.conn != oconn) + goto retry; + clear_bit(CONSIDER_RESYNC, &mdev->flags); + ns.i = mdev->state.i; + ns.conn = nconn; + ns.peer = peer_state.role; + ns.pdsk = real_peer_disk; + ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); + if ((nconn == Connected || nconn == WFBitMapS) && ns.disk == Negotiating) + ns.disk = mdev->new_state_tmp.disk; + + rv = _drbd_set_state(mdev, ns, ChgStateVerbose | ChgStateHard, NULL); + ns = mdev->state; + spin_unlock_irq(&mdev->req_lock); + + if (rv < SS_Success) { + drbd_force_state(mdev, NS(conn, Disconnecting)); + return FALSE; + } + + if (oconn > WFReportParams) { + if (nconn > Connected && peer_state.conn <= Connected && + peer_state.disk != Negotiating ) { + /* we want resync, peer has not yet decided to sync... */ + /* Nowadays only used when forcing a node into primary role and + setting its disk to UpTpDate with that */ + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + } + + mdev->net_conf->want_lose = 0; + + drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ + + return TRUE; +} + +STATIC int receive_sync_uuid(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct Drbd_SyncUUID_Packet *p = (struct Drbd_SyncUUID_Packet *)h; + + wait_event(mdev->misc_wait, + mdev->state.conn < Connected || + mdev->state.conn == WFSyncUUID); + + /* D_ASSERT( mdev->state.conn == WFSyncUUID ); */ + + ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; + if (drbd_recv(mdev, h->payload, h->length) != h->length) + return FALSE; + + /* Here the _drbd_uuid_ functions are right, current should + _not_ be rotated into the history */ + if (inc_local_if_state(mdev, Negotiating)) { + _drbd_uuid_set(mdev, Current, be64_to_cpu(p->uuid)); + _drbd_uuid_set(mdev, Bitmap, 0UL); + + drbd_start_resync(mdev, SyncTarget); + + dec_local(mdev); + } else + ERR("Ignoring SyncUUID packet!\n"); + + return TRUE; +} + +enum receive_bitmap_ret { OK, DONE, FAILED }; + +static enum receive_bitmap_ret +receive_bitmap_plain(struct drbd_conf *mdev, struct Drbd_Header *h, + unsigned long *buffer, struct bm_xfer_ctx *c) +{ + unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); + unsigned want = num_words * sizeof(long); + + if (want != h->length) { + ERR("%s:want (%u) != h->length (%u)\n", __func__, want, h->length); + return FAILED; + } + if (want == 0) + return DONE; + if (drbd_recv(mdev, buffer, want) != want) + return FAILED; + + drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer); + + c->word_offset += num_words; + c->bit_offset = c->word_offset * BITS_PER_LONG; + if (c->bit_offset > c->bm_bits) + c->bit_offset = c->bm_bits; + + return OK; +} + +static enum receive_bitmap_ret +recv_bm_rle_bits(struct drbd_conf *mdev, + struct Drbd_Compressed_Bitmap_Packet *p, + struct bm_xfer_ctx *c) +{ + struct bitstream bs; + u64 look_ahead; + u64 rl; + u64 tmp; + unsigned long s = c->bit_offset; + unsigned long e; + int len = p->head.length - (sizeof(*p) - sizeof(p->head)); + int toggle = DCBP_get_start(p); + int have; + int bits; + + bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p)); + + bits = bitstream_get_bits(&bs, &look_ahead, 64); + if (bits < 0) + return FAILED; + + for (have = bits; have > 0; s += rl, toggle = !toggle) { + bits = vli_decode_bits(&rl, look_ahead); + if (bits <= 0) + return FAILED; + + if (toggle) { + e = s + rl -1; + if (e >= c->bm_bits) { + ERR("bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); + return FAILED; + } + _drbd_bm_set_bits(mdev, s, e); + } + + if (have < bits) { + ERR("bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n", have, bits, look_ahead, + bs.cur.b - p->code, bs.buf_len); + return FAILED; + } + look_ahead >>= bits; + have -= bits; + + bits = bitstream_get_bits(&bs, &tmp, 64 - have); + if (bits < 0) + return FAILED; + look_ahead |= tmp << have; + have += bits; + } + + c->bit_offset = s; + bm_xfer_ctx_bit_to_word_offset(c); + + return (s == c->bm_bits) ? DONE : OK; +} + + +static enum receive_bitmap_ret +recv_bm_rle_bytes(struct drbd_conf *mdev, + struct Drbd_Compressed_Bitmap_Packet *p, + struct bm_xfer_ctx *c) +{ + u64 rl; + unsigned char *buf = p->code; + unsigned long s; + unsigned long e; + int len = p->head.length - (p->code - p->head.payload); + int toggle; + int n; + + s = c->bit_offset; + + /* decoding. the payload of bitmap rle packets is VLI encoded + * runlength of set and unset bits, starting with set/unset as defined + * in p->encoding & 0x80. */ + for (toggle = DCBP_get_start(p); len; s += rl, toggle = !toggle) { + if (s >= c->bm_bits) { + ERR("bitmap overflow (s:%lu) while decoding bitmap RLE packet\n", s); + return FAILED; + } + + n = vli_decode_bytes(&rl, buf, len); + if (n == 0) /* incomplete buffer! */ + return FAILED; + buf += n; + len -= n; + + if (rl == 0) { + ERR("unexpected zero runlength while decoding bitmap RLE packet\n"); + return FAILED; + } + + /* unset bits: ignore, because of x | 0 == x. */ + if (!toggle) + continue; + + /* set bits: merge into bitmap. */ + e = s + rl -1; + if (e >= c->bm_bits) { + ERR("bitmap overflow (e:%lu) while decoding bitmap RLE packet\n", e); + return FAILED; + } + _drbd_bm_set_bits(mdev, s, e); + } + + c->bit_offset = s; + bm_xfer_ctx_bit_to_word_offset(c); + + return (s == c->bm_bits) ? DONE : OK; +} + +static enum receive_bitmap_ret +decode_bitmap_c(struct drbd_conf *mdev, + struct Drbd_Compressed_Bitmap_Packet *p, + struct bm_xfer_ctx *c) +{ + switch (DCBP_get_code(p)) { + /* no default! I want the compiler to warn me! */ + case RLE_VLI_BitsFibD_0_1: + case RLE_VLI_BitsFibD_1_1: + case RLE_VLI_BitsFibD_1_2: + case RLE_VLI_BitsFibD_2_3: + break; /* TODO */ + case RLE_VLI_BitsFibD_3_5: + return recv_bm_rle_bits(mdev, p, c); + case RLE_VLI_Bytes: + return recv_bm_rle_bytes(mdev, p, c); + } + ERR("receive_bitmap_c: unknown encoding %u\n", p->encoding); + return FAILED; +} + +void INFO_bm_xfer_stats(struct drbd_conf *mdev, + const char *direction, struct bm_xfer_ctx *c) +{ + unsigned plain_would_take = sizeof(struct Drbd_Header) * + ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) + + c->bm_words * sizeof(long); + unsigned total = c->bytes[0] + c->bytes[1]; + unsigned q, r; + + /* total can not be zero. but just in case: */ + if (total == 0) + return; + + q = plain_would_take / total; + r = plain_would_take % total; + r = (r > UINT_MAX/100) ? (r / (total+99/100)) : (100 * r / total); + + INFO("%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " + "total %u; compression factor: %u.%02u\n", + direction, + c->bytes[1], c->packets[1], + c->bytes[0], c->packets[0], + total, q, r); +} + +/* Since we are processing the bitfield from lower addresses to higher, + it does not matter if the process it in 32 bit chunks or 64 bit + chunks as long as it is little endian. (Understand it as byte stream, + beginning with the lowest byte...) If we would use big endian + we would need to process it from the highest address to the lowest, + in order to be agnostic to the 32 vs 64 bits issue. + + returns 0 on failure, 1 if we suceessfully received it. */ +STATIC int receive_bitmap(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct bm_xfer_ctx c; + void *buffer; + enum receive_bitmap_ret ret; + int ok = FALSE; + + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); + + drbd_bm_lock(mdev, "receive bitmap"); + + /* maybe we should use some per thread scratch page, + * and allocate that during initial device creation? */ + buffer = (unsigned long *) __get_free_page(GFP_NOIO); + if (!buffer) { + ERR("failed to allocate one page buffer in %s\n", __func__); + goto out; + } + + c = (struct bm_xfer_ctx) { + .bm_bits = drbd_bm_bits(mdev), + .bm_words = drbd_bm_words(mdev), + }; + + do { + if (h->command == ReportBitMap) { + ret = receive_bitmap_plain(mdev, h, buffer, &c); + } else if (h->command == ReportCBitMap) { + /* MAYBE: sanity check that we speak proto >= 90, + * and the feature is enabled! */ + struct Drbd_Compressed_Bitmap_Packet *p; + + if (h->length > BM_PACKET_PAYLOAD_BYTES) { + ERR("ReportCBitmap packet too large\n"); + goto out; + } + /* use the page buff */ + p = buffer; + memcpy(p, h, sizeof(*h)); + if (drbd_recv(mdev, p->head.payload, h->length) != h->length) + goto out; + if (p->head.length <= (sizeof(*p) - sizeof(p->head))) { + ERR("ReportCBitmap packet too small (l:%u)\n", p->head.length); + return FAILED; + } + ret = decode_bitmap_c(mdev, p, &c); + } else { + drbd_WARN("receive_bitmap: h->command neither ReportBitMap nor ReportCBitMap (is 0x%x)", h->command); + goto out; + } + + c.packets[h->command == ReportBitMap]++; + c.bytes[h->command == ReportBitMap] += sizeof(struct Drbd_Header) + h->length; + + if (ret != OK) + break; + + if (!drbd_recv_header(mdev, h)) + goto out; + } while (ret == OK); + if (ret == FAILED) + goto out; + + INFO_bm_xfer_stats(mdev, "receive", &c); + + if (mdev->state.conn == WFBitMapT) { + ok = !drbd_send_bitmap(mdev); + if (!ok) + goto out; + /* Omit ChgOrdered with this state transition to avoid deadlocks. */ + ok = _drbd_request_state(mdev, NS(conn, WFSyncUUID), ChgStateVerbose); + D_ASSERT(ok == SS_Success); + } else if (mdev->state.conn != WFBitMapS) { + /* admin may have requested Disconnecting, + * other threads may have noticed network errors */ + INFO("unexpected cstate (%s) in receive_bitmap\n", + conns_to_name(mdev->state.conn)); + } + + ok = TRUE; + out: + drbd_bm_unlock(mdev); + if (ok && mdev->state.conn == WFBitMapS) + drbd_start_resync(mdev, SyncSource); + free_page((unsigned long) buffer); + return ok; +} + +STATIC int receive_skip(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + /* TODO zero copy sink :) */ + static char sink[128]; + int size, want, r; + + drbd_WARN("skipping unknown optional packet type %d, l: %d!\n", + h->command, h->length); + + size = h->length; + while (size > 0) { + want = min_t(int, size, sizeof(sink)); + r = drbd_recv(mdev, sink, want); + ERR_IF(r <= 0) break; + size -= r; + } + return size == 0; +} + +STATIC int receive_UnplugRemote(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + if (mdev->state.disk >= Inconsistent) + drbd_kick_lo(mdev); + + /* Make sure we've acked all the TCP data associated + * with the data requests being unplugged */ + drbd_tcp_quickack(mdev->data.socket); + + return TRUE; +} + +typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct Drbd_Header *); + +static drbd_cmd_handler_f drbd_default_handler[] = { + [Data] = receive_Data, + [DataReply] = receive_DataReply, + [RSDataReply] = receive_RSDataReply, + [Barrier] = receive_Barrier, + [ReportBitMap] = receive_bitmap, + [ReportCBitMap] = receive_bitmap, + [UnplugRemote] = receive_UnplugRemote, + [DataRequest] = receive_DataRequest, + [RSDataRequest] = receive_DataRequest, + [SyncParam] = receive_SyncParam, + [SyncParam89] = receive_SyncParam, + [ReportProtocol] = receive_protocol, + [ReportUUIDs] = receive_uuids, + [ReportSizes] = receive_sizes, + [ReportState] = receive_state, + [StateChgRequest] = receive_req_state, + [ReportSyncUUID] = receive_sync_uuid, + [OVRequest] = receive_DataRequest, + [OVReply] = receive_DataRequest, + [CsumRSRequest] = receive_DataRequest, + /* anything missing from this table is in + * the asender_tbl, see get_asender_cmd */ + [MAX_CMD] = NULL, +}; + +static drbd_cmd_handler_f *drbd_cmd_handler = drbd_default_handler; +static drbd_cmd_handler_f *drbd_opt_cmd_handler; + +STATIC void drbdd(struct drbd_conf *mdev) +{ + drbd_cmd_handler_f handler; + struct Drbd_Header *header = &mdev->data.rbuf.head; + + while (get_t_state(&mdev->receiver) == Running) { + drbd_thread_current_set_cpu(mdev); + if (!drbd_recv_header(mdev, header)) + break; + + if (header->command < MAX_CMD) + handler = drbd_cmd_handler[header->command]; + else if (MayIgnore < header->command + && header->command < MAX_OPT_CMD) + handler = drbd_opt_cmd_handler[header->command-MayIgnore]; + else if (header->command > MAX_OPT_CMD) + handler = receive_skip; + else + handler = NULL; + + if (unlikely(!handler)) { + ERR("unknown packet type %d, l: %d!\n", + header->command, header->length); + drbd_force_state(mdev, NS(conn, ProtocolError)); + break; + } + if (unlikely(!handler(mdev, header))) { + ERR("error receiving %s, l: %d!\n", + cmdname(header->command), header->length); + drbd_force_state(mdev, NS(conn, ProtocolError)); + break; + } + + dump_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, + __FILE__, __LINE__); + } +} + +STATIC void drbd_fail_pending_reads(struct drbd_conf *mdev) +{ + struct hlist_head *slot; + struct hlist_node *pos; + struct hlist_node *tmp; + struct drbd_request *req; + int i; + + /* + * Application READ requests + */ + spin_lock_irq(&mdev->req_lock); + for (i = 0; i < APP_R_HSIZE; i++) { + slot = mdev->app_reads_hash+i; + hlist_for_each_entry_safe(req, pos, tmp, slot, colision) { + /* it may (but should not any longer!) + * be on the work queue; if that assert triggers, + * we need to also grab the + * spin_lock_irq(&mdev->data.work.q_lock); + * and list_del_init here. */ + D_ASSERT(list_empty(&req->w.list)); + _req_mod(req, connection_lost_while_pending, 0); + } + } + for (i = 0; i < APP_R_HSIZE; i++) + if (!hlist_empty(mdev->app_reads_hash+i)) + drbd_WARN("ASSERT FAILED: app_reads_hash[%d].first: " + "%p, should be NULL\n", i, mdev->app_reads_hash[i].first); + + memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); + spin_unlock_irq(&mdev->req_lock); +} + +STATIC void drbd_disconnect(struct drbd_conf *mdev) +{ + struct drbd_work prev_work_done; + enum fencing_policy fp; + union drbd_state_t os, ns; + int rv = SS_UnknownError; + unsigned int i; + + if (mdev->state.conn == StandAlone) + return; + if (mdev->state.conn >= WFConnection) + ERR("ASSERT FAILED cstate = %s, expected < WFConnection\n", + conns_to_name(mdev->state.conn)); + + /* asender does not clean up anything. it must not interfere, either */ + drbd_thread_stop(&mdev->asender); + + mutex_lock(&mdev->data.mutex); + drbd_free_sock(mdev); + mutex_unlock(&mdev->data.mutex); + + spin_lock_irq(&mdev->req_lock); + _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); + _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee); + _drbd_clear_done_ee(mdev); + _drbd_wait_ee_list_empty(mdev, &mdev->read_ee); + reclaim_net_ee(mdev); + spin_unlock_irq(&mdev->req_lock); + + /* We do not have data structures that would allow us to + * get the rs_pending_cnt down to 0 again. + * * On SyncTarget we do not have any data structures describing + * the pending RSDataRequest's we have sent. + * * On SyncSource there is no data structure that tracks + * the RSDataReply blocks that we sent to the SyncTarget. + * And no, it is not the sum of the reference counts in the + * resync_LRU. The resync_LRU tracks the whole operation including + * the disk-IO, while the rs_pending_cnt only tracks the blocks + * on the fly. */ + drbd_rs_cancel_all(mdev); + mdev->rs_total = 0; + mdev->rs_failed = 0; + atomic_set(&mdev->rs_pending_cnt, 0); + wake_up(&mdev->misc_wait); + + /* make sure syncer is stopped and w_resume_next_sg queued */ + del_timer_sync(&mdev->resync_timer); + set_bit(STOP_SYNC_TIMER, &mdev->flags); + resync_timer_fn((unsigned long)mdev); + + /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, + * w_make_resync_request etc. which may still be on the worker queue + * to be "canceled" */ + set_bit(WORK_PENDING, &mdev->flags); + prev_work_done.cb = w_prev_work_done; + drbd_queue_work(&mdev->data.work, &prev_work_done); + wait_event(mdev->misc_wait, !test_bit(WORK_PENDING, &mdev->flags)); + + kfree(mdev->p_uuid); + mdev->p_uuid = NULL; + + if (!mdev->state.susp) + tl_clear(mdev); + + drbd_fail_pending_reads(mdev); + + INFO("Connection closed\n"); + + drbd_md_sync(mdev); + + fp = DontCare; + if (inc_local(mdev)) { + fp = mdev->bc->dc.fencing; + dec_local(mdev); + } + + if (mdev->state.role == Primary) { + if (fp >= Resource && mdev->state.pdsk >= DUnknown) { + enum drbd_disk_state nps = drbd_try_outdate_peer(mdev); + drbd_request_state(mdev, NS(pdsk, nps)); + } + } + + spin_lock_irq(&mdev->req_lock); + os = mdev->state; + if (os.conn >= Unconnected) { + /* Do not restart in case we are Disconnecting */ + ns = os; + ns.conn = Unconnected; + rv = _drbd_set_state(mdev, ns, ChgStateVerbose, NULL); + } + spin_unlock_irq(&mdev->req_lock); + + if (os.conn == Disconnecting) { + struct hlist_head *h; + wait_event(mdev->misc_wait, atomic_read(&mdev->net_cnt) == 0); + + /* we must not free the tl_hash + * while application io is still on the fly */ + wait_event(mdev->misc_wait, atomic_read(&mdev->ap_bio_cnt) == 0); + + spin_lock_irq(&mdev->req_lock); + /* paranoia code */ + for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++) + if (h->first) + ERR("ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", + (int)(h - mdev->ee_hash), h->first); + kfree(mdev->ee_hash); + mdev->ee_hash = NULL; + mdev->ee_hash_s = 0; + + /* paranoia code */ + for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) + if (h->first) + ERR("ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", + (int)(h - mdev->tl_hash), h->first); + kfree(mdev->tl_hash); + mdev->tl_hash = NULL; + mdev->tl_hash_s = 0; + spin_unlock_irq(&mdev->req_lock); + + crypto_free_hash(mdev->cram_hmac_tfm); + mdev->cram_hmac_tfm = NULL; + + kfree(mdev->net_conf); + mdev->net_conf = NULL; + drbd_request_state(mdev, NS(conn, StandAlone)); + } + + /* they do trigger all the time. + * hm. why won't tcp release the page references, + * we already released the socket!? */ + i = atomic_read(&mdev->pp_in_use); + if (i) + DBG("pp_in_use = %u, expected 0\n", i); + if (!list_empty(&mdev->net_ee)) + DBG("net_ee not empty!\n"); + + D_ASSERT(list_empty(&mdev->read_ee)); + D_ASSERT(list_empty(&mdev->active_ee)); + D_ASSERT(list_empty(&mdev->sync_ee)); + D_ASSERT(list_empty(&mdev->done_ee)); + + /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ + atomic_set(&mdev->current_epoch->epoch_size, 0); + D_ASSERT(list_empty(&mdev->current_epoch->list)); +} + +/* + * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version + * we can agree on is stored in agreed_pro_version. + * + * feature flags and the reserved array should be enough room for future + * enhancements of the handshake protocol, and possible plugins... + * + * for now, they are expected to be zero, but ignored. + */ +STATIC int drbd_send_handshake(struct drbd_conf *mdev) +{ + /* ASSERT current == mdev->receiver ... */ + struct Drbd_HandShake_Packet *p = &mdev->data.sbuf.HandShake; + int ok; + + if (mutex_lock_interruptible(&mdev->data.mutex)) { + ERR("interrupted during initial handshake\n"); + return 0; /* interrupted. not ok. */ + } + + if (mdev->data.socket == NULL) { + mutex_unlock(&mdev->data.mutex); + return 0; + } + + memset(p, 0, sizeof(*p)); + p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); + p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); + ok = _drbd_send_cmd( mdev, mdev->data.socket, HandShake, + (struct Drbd_Header *)p, sizeof(*p), 0 ); + mutex_unlock(&mdev->data.mutex); + return ok; +} + +/* + * return values: + * 1 yess, we have a valid connection + * 0 oops, did not work out, please try again + * -1 peer talks different language, + * no point in trying again, please go standalone. + */ +int drbd_do_handshake(struct drbd_conf *mdev) +{ + /* ASSERT current == mdev->receiver ... */ + struct Drbd_HandShake_Packet *p = &mdev->data.rbuf.HandShake; + const int expect = sizeof(struct Drbd_HandShake_Packet) + -sizeof(struct Drbd_Header); + int rv; + + rv = drbd_send_handshake(mdev); + if (!rv) + return 0; + + rv = drbd_recv_header(mdev, &p->head); + if (!rv) + return 0; + + if (p->head.command != HandShake) { + ERR("expected HandShake packet, received: %s (0x%04x)\n", + cmdname(p->head.command), p->head.command); + return -1; + } + + if (p->head.length != expect) { + ERR("expected HandShake length: %u, received: %u\n", + expect, p->head.length); + return -1; + } + + rv = drbd_recv(mdev, &p->head.payload, expect); + + if (rv != expect) { + ERR("short read receiving handshake packet: l=%u\n", rv); + return 0; + } + + dump_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, + __FILE__, __LINE__); + + p->protocol_min = be32_to_cpu(p->protocol_min); + p->protocol_max = be32_to_cpu(p->protocol_max); + if (p->protocol_max == 0) + p->protocol_max = p->protocol_min; + + if (PRO_VERSION_MAX < p->protocol_min || + PRO_VERSION_MIN > p->protocol_max) + goto incompat; + + mdev->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); + + INFO("Handshake successful: " + "Agreed network protocol version %d\n", mdev->agreed_pro_version); + + return 1; + + incompat: + ERR("incompatible DRBD dialects: " + "I support %d-%d, peer supports %d-%d\n", + PRO_VERSION_MIN, PRO_VERSION_MAX, + p->protocol_min, p->protocol_max); + return -1; +} + +#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) +int drbd_do_auth(struct drbd_conf *mdev) +{ + ERR("This kernel was build without CONFIG_CRYPTO_HMAC.\n"); + ERR("You need to disable 'cram-hmac-alg' in drbd.conf.\n"); + return 0; +} +#else +#define CHALLENGE_LEN 64 +int drbd_do_auth(struct drbd_conf *mdev) +{ + char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ + struct scatterlist sg; + char *response = NULL; + char *right_response = NULL; + char *peers_ch = NULL; + struct Drbd_Header p; + unsigned int key_len = strlen(mdev->net_conf->shared_secret); + unsigned int resp_size; + struct hash_desc desc; + int rv; + + desc.tfm = mdev->cram_hmac_tfm; + desc.flags = 0; + + rv = crypto_hash_setkey(mdev->cram_hmac_tfm, + (u8 *)mdev->net_conf->shared_secret, key_len); + if (rv) { + ERR("crypto_hash_setkey() failed with %d\n", rv); + rv = 0; + goto fail; + } + + get_random_bytes(my_challenge, CHALLENGE_LEN); + + rv = drbd_send_cmd2(mdev, AuthChallenge, my_challenge, CHALLENGE_LEN); + if (!rv) + goto fail; + + rv = drbd_recv_header(mdev, &p); + if (!rv) + goto fail; + + if (p.command != AuthChallenge) { + ERR("expected AuthChallenge packet, received: %s (0x%04x)\n", + cmdname(p.command), p.command); + rv = 0; + goto fail; + } + + if (p.length > CHALLENGE_LEN*2) { + ERR("expected AuthChallenge payload too big.\n"); + rv = 0; + goto fail; + } + + peers_ch = kmalloc(p.length, GFP_KERNEL); + if (peers_ch == NULL) { + ERR("kmalloc of peers_ch failed\n"); + rv = 0; + goto fail; + } + + rv = drbd_recv(mdev, peers_ch, p.length); + + if (rv != p.length) { + ERR("short read AuthChallenge: l=%u\n", rv); + rv = 0; + goto fail; + } + + resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm); + response = kmalloc(resp_size, GFP_KERNEL); + if (response == NULL) { + ERR("kmalloc of response failed\n"); + rv = 0; + goto fail; + } + + sg_init_table(&sg, 1); + sg_set_buf(&sg, peers_ch, p.length); + + rv = crypto_hash_digest(&desc, &sg, sg.length, response); + if (rv) { + ERR("crypto_hash_digest() failed with %d\n", rv); + rv = 0; + goto fail; + } + + rv = drbd_send_cmd2(mdev, AuthResponse, response, resp_size); + if (!rv) + goto fail; + + rv = drbd_recv_header(mdev, &p); + if (!rv) + goto fail; + + if (p.command != AuthResponse) { + ERR("expected AuthResponse packet, received: %s (0x%04x)\n", + cmdname(p.command), p.command); + rv = 0; + goto fail; + } + + if (p.length != resp_size) { + ERR("expected AuthResponse payload of wrong size\n"); + rv = 0; + goto fail; + } + + rv = drbd_recv(mdev, response , resp_size); + + if (rv != resp_size) { + ERR("short read receiving AuthResponse: l=%u\n", rv); + rv = 0; + goto fail; + } + + right_response = kmalloc(resp_size, GFP_KERNEL); + if (response == NULL) { + ERR("kmalloc of right_response failed\n"); + rv = 0; + goto fail; + } + + sg_set_buf(&sg, my_challenge, CHALLENGE_LEN); + + rv = crypto_hash_digest(&desc, &sg, sg.length, right_response); + if (rv) { + ERR("crypto_hash_digest() failed with %d\n", rv); + rv = 0; + goto fail; + } + + rv = !memcmp(response, right_response, resp_size); + + if (rv) + INFO("Peer authenticated using %d bytes of '%s' HMAC\n", + resp_size, mdev->net_conf->cram_hmac_alg); + + fail: + kfree(peers_ch); + kfree(response); + kfree(right_response); + + return rv; +} +#endif + +STATIC int drbdd_init(struct Drbd_thread *thi) +{ + struct drbd_conf *mdev = thi->mdev; + unsigned int minor = mdev_to_minor(mdev); + int h; + + sprintf(current->comm, "drbd%d_receiver", minor); + + INFO("receiver (re)started\n"); + + do { + h = drbd_connect(mdev); + if (h == 0) { + drbd_disconnect(mdev); + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + } + if (h == -1) { + drbd_WARN("Discarding network configuration.\n"); + drbd_force_state(mdev, NS(conn, Disconnecting)); + } + } while (h == 0); + + if (h > 0) { + if (inc_net(mdev)) { + drbdd(mdev); + dec_net(mdev); + } + } + + drbd_disconnect(mdev); + + INFO("receiver terminated\n"); + return 0; +} + +/* ********* acknowledge sender ******** */ + +STATIC int got_RqSReply(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct Drbd_RqS_Reply_Packet *p = (struct Drbd_RqS_Reply_Packet *)h; + + int retcode = be32_to_cpu(p->retcode); + + if (retcode >= SS_Success) { + set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); + } else { + set_bit(CL_ST_CHG_FAIL, &mdev->flags); + ERR("Requested state change failed by peer: %s (%d)\n", + set_st_err_name(retcode), retcode); + } + wake_up(&mdev->state_wait); + + return TRUE; +} + +STATIC int got_Ping(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + return drbd_send_ping_ack(mdev); + +} + +STATIC int got_PingAck(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + /* restore idle timeout */ + mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; + + return TRUE; +} + +STATIC int got_IsInSync(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + sector_t sector = be64_to_cpu(p->sector); + int blksize = be32_to_cpu(p->blksize); + + D_ASSERT(mdev->agreed_pro_version >= 89); + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + + drbd_rs_complete_io(mdev, sector); + drbd_set_in_sync(mdev, sector, blksize); + /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ + mdev->rs_same_csum += (blksize >> BM_BLOCK_SIZE_B); + dec_rs_pending(mdev); + + return TRUE; +} + +STATIC int got_BlockAck(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct drbd_request *req; + struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + sector_t sector = be64_to_cpu(p->sector); + int blksize = be32_to_cpu(p->blksize); + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + + if (is_syncer_block_id(p->block_id)) { + drbd_set_in_sync(mdev, sector, blksize); + dec_rs_pending(mdev); + } else { + spin_lock_irq(&mdev->req_lock); + req = _ack_id_to_req(mdev, p->block_id, sector); + + if (unlikely(!req)) { + spin_unlock_irq(&mdev->req_lock); + ERR("Got a corrupt block_id/sector pair(2).\n"); + return FALSE; + } + + switch (be16_to_cpu(h->command)) { + case RSWriteAck: + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + _req_mod(req, write_acked_by_peer_and_sis, 0); + break; + case WriteAck: + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + _req_mod(req, write_acked_by_peer, 0); + break; + case RecvAck: + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B); + _req_mod(req, recv_acked_by_peer, 0); + break; + case DiscardAck: + D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + ALERT("Got DiscardAck packet %llus +%u!" + " DRBD is not a random data generator!\n", + (unsigned long long)req->sector, req->size); + _req_mod(req, conflict_discarded_by_peer, 0); + break; + default: + D_ASSERT(0); + } + spin_unlock_irq(&mdev->req_lock); + } + /* dec_ap_pending is handled within _req_mod */ + + return TRUE; +} + +STATIC int got_NegAck(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + sector_t sector = be64_to_cpu(p->sector); + struct drbd_request *req; + + if (__ratelimit(&drbd_ratelimit_state)) + drbd_WARN("Got NegAck packet. Peer is in troubles?\n"); + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + + if (is_syncer_block_id(p->block_id)) { + int size = be32_to_cpu(p->blksize); + + dec_rs_pending(mdev); + + drbd_rs_failed_io(mdev, sector, size); + } else { + spin_lock_irq(&mdev->req_lock); + req = _ack_id_to_req(mdev, p->block_id, sector); + + if (unlikely(!req)) { + spin_unlock_irq(&mdev->req_lock); + ERR("Got a corrupt block_id/sector pair(2).\n"); + return FALSE; + } + + _req_mod(req, neg_acked, 0); + spin_unlock_irq(&mdev->req_lock); + } + + return TRUE; +} + +STATIC int got_NegDReply(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct drbd_request *req; + struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + sector_t sector = be64_to_cpu(p->sector); + + spin_lock_irq(&mdev->req_lock); + req = _ar_id_to_req(mdev, p->block_id, sector); + if (unlikely(!req)) { + spin_unlock_irq(&mdev->req_lock); + ERR("Got a corrupt block_id/sector pair(3).\n"); + return FALSE; + } + + _req_mod(req, neg_acked, 0); + spin_unlock_irq(&mdev->req_lock); + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + + ERR("Got NegDReply; Sector %llus, len %u; Fail original request.\n", + (unsigned long long)sector, be32_to_cpu(p->blksize)); + + return TRUE; +} + +STATIC int got_NegRSDReply(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + sector_t sector; + int size; + struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + + sector = be64_to_cpu(p->sector); + size = be32_to_cpu(p->blksize); + D_ASSERT(p->block_id == ID_SYNCER); + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + + dec_rs_pending(mdev); + + if (inc_local_if_state(mdev, Failed)) { + drbd_rs_complete_io(mdev, sector); + drbd_rs_failed_io(mdev, sector, size); + dec_local(mdev); + } + + return TRUE; +} + +STATIC int got_BarrierAck(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct Drbd_BarrierAck_Packet *p = (struct Drbd_BarrierAck_Packet *)h; + + tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); + + return TRUE; +} + +STATIC int got_OVResult(struct drbd_conf *mdev, struct Drbd_Header *h) +{ + struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + struct drbd_work *w; + sector_t sector; + int size; + + sector = be64_to_cpu(p->sector); + size = be32_to_cpu(p->blksize); + + update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + + if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) + drbd_ov_oos_found(mdev, sector, size); + else + ov_oos_print(mdev); + + drbd_rs_complete_io(mdev, sector); + dec_rs_pending(mdev); + + if (--mdev->ov_left == 0) { + w = kmalloc(sizeof(*w), GFP_KERNEL); + if (w) { + w->cb = w_ov_finished; + drbd_queue_work_front(&mdev->data.work, w); + } else { + ERR("kmalloc(w) failed."); + drbd_resync_finished(mdev); + } + } + return TRUE; +} + +struct asender_cmd { + size_t pkt_size; + int (*process)(struct drbd_conf *mdev, struct Drbd_Header *h); +}; + +static struct asender_cmd *get_asender_cmd(int cmd) +{ + static struct asender_cmd asender_tbl[] = { + /* anything missing from this table is in + * the drbd_cmd_handler (drbd_default_handler) table, + * see the beginning of drbdd() */ + [Ping] = { sizeof(struct Drbd_Header), got_Ping }, + [PingAck] = { sizeof(struct Drbd_Header), got_PingAck }, + [RecvAck] = { sizeof(struct Drbd_BlockAck_Packet), got_BlockAck }, + [WriteAck] = { sizeof(struct Drbd_BlockAck_Packet), got_BlockAck }, + [RSWriteAck] = { sizeof(struct Drbd_BlockAck_Packet), got_BlockAck }, + [DiscardAck] = { sizeof(struct Drbd_BlockAck_Packet), got_BlockAck }, + [NegAck] = { sizeof(struct Drbd_BlockAck_Packet), got_NegAck }, + [NegDReply] = { sizeof(struct Drbd_BlockAck_Packet), got_NegDReply }, + [NegRSDReply] = { sizeof(struct Drbd_BlockAck_Packet), got_NegRSDReply}, + [OVResult] = { sizeof(struct Drbd_BlockAck_Packet), got_OVResult }, + [BarrierAck] = { sizeof(struct Drbd_BarrierAck_Packet), got_BarrierAck }, + [StateChgReply] = { sizeof(struct Drbd_RqS_Reply_Packet), got_RqSReply }, + [RSIsInSync] = { sizeof(struct Drbd_BlockAck_Packet), got_IsInSync }, + [MAX_CMD] = { 0, NULL }, + }; + if (cmd > MAX_CMD) + return NULL; + return &asender_tbl[cmd]; +} + +STATIC int drbd_asender(struct Drbd_thread *thi) +{ + struct drbd_conf *mdev = thi->mdev; + struct Drbd_Header *h = &mdev->meta.rbuf.head; + struct asender_cmd *cmd = NULL; + + int rv, len; + void *buf = h; + int received = 0; + int expect = sizeof(struct Drbd_Header); + int empty; + + sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); + + current->policy = SCHED_RR; /* Make this a realtime task! */ + current->rt_priority = 2; /* more important than all other tasks */ + + while (get_t_state(thi) == Running) { + drbd_thread_current_set_cpu(mdev); + if (test_and_clear_bit(SEND_PING, &mdev->flags)) { + ERR_IF(!drbd_send_ping(mdev)) goto reconnect; + mdev->meta.socket->sk->sk_rcvtimeo = + mdev->net_conf->ping_timeo*HZ/10; + } + + /* conditionally cork; + * it may hurt latency if we cork without much to send */ + if (!mdev->net_conf->no_cork && + 3 < atomic_read(&mdev->unacked_cnt)) + drbd_tcp_cork(mdev->meta.socket); + while (1) { + clear_bit(SIGNAL_ASENDER, &mdev->flags); + flush_signals(current); + if (!drbd_process_done_ee(mdev)) { + ERR("process_done_ee() = NOT_OK\n"); + goto reconnect; + } + /* to avoid race with newly queued ACKs */ + set_bit(SIGNAL_ASENDER, &mdev->flags); + spin_lock_irq(&mdev->req_lock); + empty = list_empty(&mdev->done_ee); + spin_unlock_irq(&mdev->req_lock); + /* new ack may have been queued right here, + * but then there is also a signal pending, + * and we start over... */ + if (empty) + break; + } + /* but unconditionally uncork unless disabled */ + if (!mdev->net_conf->no_cork) + drbd_tcp_uncork(mdev->meta.socket); + + /* short circuit, recv_msg would return EINTR anyways. */ + if (signal_pending(current)) + continue; + + rv = drbd_recv_short(mdev, mdev->meta.socket, + buf, expect-received, 0); + clear_bit(SIGNAL_ASENDER, &mdev->flags); + + flush_signals(current); + + /* Note: + * -EINTR (on meta) we got a signal + * -EAGAIN (on meta) rcvtimeo expired + * -ECONNRESET other side closed the connection + * -ERESTARTSYS (on data) we got a signal + * rv < 0 other than above: unexpected error! + * rv == expected: full header or command + * rv < expected: "woken" by signal during receive + * rv == 0 : "connection shut down by peer" + */ + if (likely(rv > 0)) { + received += rv; + buf += rv; + } else if (rv == 0) { + ERR("meta connection shut down by peer.\n"); + goto reconnect; + } else if (rv == -EAGAIN) { + if (mdev->meta.socket->sk->sk_rcvtimeo == + mdev->net_conf->ping_timeo*HZ/10) { + ERR("PingAck did not arrive in time.\n"); + goto reconnect; + } + set_bit(SEND_PING, &mdev->flags); + continue; + } else if (rv == -EINTR) { + continue; + } else { + ERR("sock_recvmsg returned %d\n", rv); + goto reconnect; + } + + if (received == expect && cmd == NULL) { + if (unlikely(h->magic != BE_DRBD_MAGIC)) { + ERR("magic?? on meta m: 0x%lx c: %d l: %d\n", + (long)be32_to_cpu(h->magic), + h->command, h->length); + goto reconnect; + } + cmd = get_asender_cmd(be16_to_cpu(h->command)); + len = be16_to_cpu(h->length); + if (unlikely(cmd == NULL)) { + ERR("unknown command?? on meta m: 0x%lx c: %d l: %d\n", + (long)be32_to_cpu(h->magic), + h->command, h->length); + goto disconnect; + } + expect = cmd->pkt_size; + ERR_IF(len != expect-sizeof(struct Drbd_Header)) { + dump_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); + DUMPI(expect); + goto reconnect; + } + } + if (received == expect) { + D_ASSERT(cmd != NULL); + dump_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); + if (!cmd->process(mdev, h)) + goto reconnect; + + buf = h; + received = 0; + expect = sizeof(struct Drbd_Header); + cmd = NULL; + } + } + + if (0) { +reconnect: + drbd_force_state(mdev, NS(conn, NetworkFailure)); + } + if (0) { +disconnect: + drbd_force_state(mdev, NS(conn, Disconnecting)); + } + clear_bit(SIGNAL_ASENDER, &mdev->flags); + + D_ASSERT(mdev->state.conn < Connected); + INFO("asender terminated\n"); + + return 0; +} -- cgit v1.2.3 From 9f8dd59cf44bbd7f857cc4fc3de12a3fd4bae7e8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:17 +0200 Subject: DRBD: proc The /proc/drbd interface. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_proc.c | 271 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 271 insertions(+) create mode 100644 drivers/block/drbd/drbd_proc.c diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c new file mode 100644 index 000000000000..6dc93edfb646 --- /dev/null +++ b/drivers/block/drbd/drbd_proc.c @@ -0,0 +1,271 @@ +/* + drbd_proc.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include "drbd_int.h" +#include "lru_cache.h" /* for lc_sprintf_stats */ + +STATIC int drbd_proc_open(struct inode *inode, struct file *file); + + +struct proc_dir_entry *drbd_proc; +struct file_operations drbd_proc_fops = { + .owner = THIS_MODULE, + .open = drbd_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + +/*lge + * progress bars shamelessly adapted from driver/md/md.c + * output looks like + * [=====>..............] 33.5% (23456/123456) + * finish: 2:20:20 speed: 6,345 (6,456) K/sec + */ +STATIC void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) +{ + unsigned long db, dt, dbdt, rt, rs_left; + unsigned int res; + int i, x, y; + + drbd_get_syncer_progress(mdev, &rs_left, &res); + + x = res/50; + y = 20-x; + seq_printf(seq, "\t["); + for (i = 1; i < x; i++) + seq_printf(seq, "="); + seq_printf(seq, ">"); + for (i = 0; i < y; i++) + seq_printf(seq, "."); + seq_printf(seq, "] "); + + seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10); + /* if more than 1 GB display in MB */ + if (mdev->rs_total > 0x100000L) + seq_printf(seq, "(%lu/%lu)M\n\t", + (unsigned long) Bit2KB(rs_left >> 10), + (unsigned long) Bit2KB(mdev->rs_total >> 10)); + else + seq_printf(seq, "(%lu/%lu)K\n\t", + (unsigned long) Bit2KB(rs_left), + (unsigned long) Bit2KB(mdev->rs_total)); + + /* see drivers/md/md.c + * We do not want to overflow, so the order of operands and + * the * 100 / 100 trick are important. We do a +1 to be + * safe against division by zero. We only estimate anyway. + * + * dt: time from mark until now + * db: blocks written from mark until now + * rt: remaining time + */ + dt = (jiffies - mdev->rs_mark_time) / HZ; + + if (dt > 20) { + /* if we made no update to rs_mark_time for too long, + * we are stalled. show that. */ + seq_printf(seq, "stalled\n"); + return; + } + + if (!dt) + dt++; + db = mdev->rs_mark_left - rs_left; + rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */ + + seq_printf(seq, "finish: %lu:%02lu:%02lu", + rt / 3600, (rt % 3600) / 60, rt % 60); + + /* current speed average over (SYNC_MARKS * SYNC_MARK_STEP) jiffies */ + dbdt = Bit2KB(db/dt); + if (dbdt > 1000) + seq_printf(seq, " speed: %ld,%03ld", + dbdt/1000, dbdt % 1000); + else + seq_printf(seq, " speed: %ld", dbdt); + + /* mean speed since syncer started + * we do account for PausedSync periods */ + dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; + if (dt <= 0) + dt = 1; + db = mdev->rs_total - rs_left; + dbdt = Bit2KB(db/dt); + if (dbdt > 1000) + seq_printf(seq, " (%ld,%03ld)", + dbdt/1000, dbdt % 1000); + else + seq_printf(seq, " (%ld)", dbdt); + + seq_printf(seq, " K/sec\n"); +} + +#ifdef ENABLE_DYNAMIC_TRACE +STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e) +{ + struct bm_extent *bme = (struct bm_extent *)e; + + seq_printf(seq, "%5d %s %s\n", bme->rs_left, + bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------", + bme->flags & BME_LOCKED ? "LOCKED" : "------" + ); +} +#endif + +STATIC int drbd_seq_show(struct seq_file *seq, void *v) +{ + int i, hole = 0; + const char *sn; + struct drbd_conf *mdev; + + static char write_ordering_chars[] = { + [WO_none] = 'n', + [WO_drain_io] = 'd', + [WO_bdev_flush] = 'f', + [WO_bio_barrier] = 'b', + }; + + seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n", + API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag()); + + /* + cs .. connection state + ro .. node role (local/remote) + ds .. disk state (local/remote) + protocol + various flags + ns .. network send + nr .. network receive + dw .. disk write + dr .. disk read + al .. activity log write count + bm .. bitmap update write count + pe .. pending (waiting for ack or data reply) + ua .. unack'd (still need to send ack or data reply) + ap .. application requests accepted, but not yet completed + ep .. number of epochs currently "on the fly", BarrierAck pending + wo .. write ordering mode currently in use + oos .. known out-of-sync kB + */ + + for (i = 0; i < minor_count; i++) { + mdev = minor_to_mdev(i); + if (!mdev) { + hole = 1; + continue; + } + if (hole) { + hole = 0; + seq_printf(seq, "\n"); + } + + sn = conns_to_name(mdev->state.conn); + + if (mdev->state.conn == StandAlone && + mdev->state.disk == Diskless && + mdev->state.role == Secondary) { + seq_printf(seq, "%2d: cs:Unconfigured\n", i); + } else { + seq_printf(seq, + "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c\n" + " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " + "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c", + i, sn, + roles_to_name(mdev->state.role), + roles_to_name(mdev->state.peer), + disks_to_name(mdev->state.disk), + disks_to_name(mdev->state.pdsk), + (mdev->net_conf == NULL ? ' ' : + (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')), + mdev->state.susp ? 's' : 'r', + mdev->state.aftr_isp ? 'a' : '-', + mdev->state.peer_isp ? 'p' : '-', + mdev->state.user_isp ? 'u' : '-', + mdev->congestion_reason, + mdev->send_cnt/2, + mdev->recv_cnt/2, + mdev->writ_cnt/2, + mdev->read_cnt/2, + mdev->al_writ_cnt, + mdev->bm_writ_cnt, + atomic_read(&mdev->local_cnt), + atomic_read(&mdev->ap_pending_cnt) + + atomic_read(&mdev->rs_pending_cnt), + atomic_read(&mdev->unacked_cnt), + atomic_read(&mdev->ap_bio_cnt), + mdev->epochs, + write_ordering_chars[mdev->write_ordering] + ); + seq_printf(seq, " oos:%lu\n", + Bit2KB(drbd_bm_total_weight(mdev))); + } + if (mdev->state.conn == SyncSource || + mdev->state.conn == SyncTarget) + drbd_syncer_progress(mdev, seq); + + if (mdev->state.conn == VerifyS || mdev->state.conn == VerifyT) + seq_printf(seq, "\t%3d%% %lu/%lu\n", + (int)((mdev->rs_total-mdev->ov_left) / + (mdev->rs_total/100+1)), + mdev->rs_total - mdev->ov_left, + mdev->rs_total); + +#ifdef ENABLE_DYNAMIC_TRACE + if (proc_details >= 1 && inc_local_if_state(mdev, Failed)) { + lc_printf_stats(seq, mdev->resync); + lc_printf_stats(seq, mdev->act_log); + dec_local(mdev); + } + + if (proc_details >= 2) { + if (mdev->resync) { + lc_dump(mdev->resync, seq, "rs_left", + resync_dump_detail); + } + } +#endif + } + + return 0; +} + +STATIC int drbd_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, drbd_seq_show, PDE(inode)->data); +} + +/* PROC FS stuff end */ -- cgit v1.2.3 From 2cbcb688f4af1628faaaa48690b9f5356a639590 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:18 +0200 Subject: DRBD: worker Our generic worker thread. Does the actual sending of data via the network link. Does all the after-state-change activities, that have to be done without holding the req_lock spinlock. And some other stuff. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 1463 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 1463 insertions(+) create mode 100644 drivers/block/drbd/drbd_worker.c diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c new file mode 100644 index 000000000000..a9421dc104bf --- /dev/null +++ b/drivers/block/drbd/drbd_worker.c @@ -0,0 +1,1463 @@ +/* + drbd_worker.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "drbd_int.h" +#include "drbd_req.h" + +#define SLEEP_TIME (HZ/10) + +STATIC int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); + + + +/* defined here: + drbd_md_io_complete + drbd_endio_write_sec + drbd_endio_read_sec + drbd_endio_pri + + * more endio handlers: + atodb_endio in drbd_actlog.c + drbd_bm_async_io_complete in drbd_bitmap.c + + * For all these callbacks, note the follwing: + * The callbacks will be called in irq context by the IDE drivers, + * and in Softirqs/Tasklets/BH context by the SCSI drivers. + * Try to get the locking right :) + * + */ + + +/* About the global_state_lock + Each state transition on an device holds a read lock. In case we have + to evaluate the sync after dependencies, we grab a write lock, because + we need stable states on all devices for that. */ +rwlock_t global_state_lock; + +/* used for synchronous meta data and bitmap IO + * submitted by drbd_md_sync_page_io() + */ +void drbd_md_io_complete(struct bio *bio, int error) +{ + struct drbd_md_io *md_io; + + /* error parameter ignored: + * drbd_md_sync_page_io explicitly tests bio_uptodate(bio); */ + + md_io = (struct drbd_md_io *)bio->bi_private; + + md_io->error = error; + + dump_internal_bio("Md", md_io->mdev, bio, 1); + + complete(&md_io->event); +} + +/* reads on behalf of the partner, + * "submitted" by the receiver + */ +void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) +{ + unsigned long flags = 0; + struct Tl_epoch_entry *e = NULL; + struct drbd_conf *mdev; + int uptodate = bio_flagged(bio, BIO_UPTODATE); + + e = bio->bi_private; + mdev = e->mdev; + + if (!error && !uptodate) { + /* strange behaviour of some lower level drivers... + * fail the request by clearing the uptodate flag, + * but do not return any error?! + * do we want to drbd_WARN() on this? */ + error = -EIO; + } + + D_ASSERT(e->block_id != ID_VACANT); + + dump_internal_bio("Sec", mdev, bio, 1); + + spin_lock_irqsave(&mdev->req_lock, flags); + mdev->read_cnt += e->size >> 9; + list_del(&e->w.list); + if (list_empty(&mdev->read_ee)) + wake_up(&mdev->ee_wait); + spin_unlock_irqrestore(&mdev->req_lock, flags); + + drbd_chk_io_error(mdev, error, FALSE); + drbd_queue_work(&mdev->data.work, &e->w); + dec_local(mdev); + + MTRACE(TraceTypeEE, TraceLvlAll, + INFO("Moved EE (READ) to worker sec=%llus size=%u ee=%p\n", + (unsigned long long)e->sector, e->size, e); + ); +} + +/* writes on behalf of the partner, or resync writes, + * "submitted" by the receiver. + */ +void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) +{ + unsigned long flags = 0; + struct Tl_epoch_entry *e = NULL; + struct drbd_conf *mdev; + sector_t e_sector; + int do_wake; + int is_syncer_req; + int do_al_complete_io; + int uptodate = bio_flagged(bio, BIO_UPTODATE); + + e = bio->bi_private; + mdev = e->mdev; + + if (!error && !uptodate) { + /* strange behaviour of some lower level drivers... + * fail the request by clearing the uptodate flag, + * but do not return any error?! + * do we want to drbd_WARN() on this? */ + error = -EIO; + } + + /* error == -ENOTSUPP would be a better test, + * alas it is not reliable */ + if (error && e->flags & EE_IS_BARRIER) { + drbd_bump_write_ordering(mdev, WO_bdev_flush); + spin_lock_irqsave(&mdev->req_lock, flags); + list_del(&e->w.list); + e->w.cb = w_e_reissue; + __release(local); /* Actually happens in w_e_reissue. */ + spin_unlock_irqrestore(&mdev->req_lock, flags); + drbd_queue_work(&mdev->data.work, &e->w); + return; + } + + D_ASSERT(e->block_id != ID_VACANT); + + dump_internal_bio("Sec", mdev, bio, 1); + + spin_lock_irqsave(&mdev->req_lock, flags); + mdev->writ_cnt += e->size >> 9; + is_syncer_req = is_syncer_block_id(e->block_id); + + /* after we moved e to done_ee, + * we may no longer access it, + * it may be freed/reused already! + * (as soon as we release the req_lock) */ + e_sector = e->sector; + do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; + + list_del(&e->w.list); /* has been on active_ee or sync_ee */ + list_add_tail(&e->w.list, &mdev->done_ee); + + MTRACE(TraceTypeEE, TraceLvlAll, + INFO("Moved EE (WRITE) to done_ee sec=%llus size=%u ee=%p\n", + (unsigned long long)e->sector, e->size, e); + ); + + /* No hlist_del_init(&e->colision) here, we did not send the Ack yet, + * neither did we wake possibly waiting conflicting requests. + * done from "drbd_process_done_ee" within the appropriate w.cb + * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ + + do_wake = is_syncer_req + ? list_empty(&mdev->sync_ee) + : list_empty(&mdev->active_ee); + + if (error) + __drbd_chk_io_error(mdev, FALSE); + spin_unlock_irqrestore(&mdev->req_lock, flags); + + if (is_syncer_req) + drbd_rs_complete_io(mdev, e_sector); + + if (do_wake) + wake_up(&mdev->ee_wait); + + if (do_al_complete_io) + drbd_al_complete_io(mdev, e_sector); + + wake_asender(mdev); + dec_local(mdev); + +} + +/* read, readA or write requests on Primary comming from drbd_make_request + */ +void drbd_endio_pri(struct bio *bio, int error) +{ + unsigned long flags; + struct drbd_request *req = bio->bi_private; + struct drbd_conf *mdev = req->mdev; + enum drbd_req_event what; + int uptodate = bio_flagged(bio, BIO_UPTODATE); + + if (!error && !uptodate) { + /* strange behaviour of some lower level drivers... + * fail the request by clearing the uptodate flag, + * but do not return any error?! + * do we want to drbd_WARN() on this? */ + error = -EIO; + } + + dump_internal_bio("Pri", mdev, bio, 1); + + /* to avoid recursion in _req_mod */ + what = error + ? (bio_data_dir(bio) == WRITE) + ? write_completed_with_error + : read_completed_with_error + : completed_ok; + spin_lock_irqsave(&mdev->req_lock, flags); + _req_mod(req, what, error); + spin_unlock_irqrestore(&mdev->req_lock, flags); +} + +int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_request *req = (struct drbd_request *)w; + int ok; + + /* NOTE: mdev->bc can be NULL by the time we get here! */ + /* D_ASSERT(mdev->bc->dc.on_io_error != PassOn); */ + + /* the only way this callback is scheduled is from _req_may_be_done, + * when it is done and had a local write error, see comments there */ + drbd_req_free(req); + + ok = drbd_io_error(mdev, FALSE); + if (unlikely(!ok)) + ERR("Sending in w_io_error() failed\n"); + return ok; +} + +int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_request *req = (struct drbd_request *)w; + + /* We should not detach for read io-error, + * but try to WRITE the DataReply to the failed location, + * to give the disk the chance to relocate that block */ + drbd_io_error(mdev, FALSE); /* tries to schedule a detach and notifies peer */ + + spin_lock_irq(&mdev->req_lock); + if (cancel || + mdev->state.conn < Connected || + mdev->state.pdsk <= Inconsistent) { + _req_mod(req, send_canceled, 0); + spin_unlock_irq(&mdev->req_lock); + ALERT("WE ARE LOST. Local IO failure, no peer.\n"); + return 1; + } + spin_unlock_irq(&mdev->req_lock); + + return w_send_read_req(mdev, w, 0); +} + +int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + ERR_IF(cancel) return 1; + ERR("resync inactive, but callback triggered??\n"); + return 1; /* Simply ignore this! */ +} + +STATIC void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest) +{ + struct hash_desc desc; + struct scatterlist sg; + struct bio_vec *bvec; + int i; + + desc.tfm = tfm; + desc.flags = 0; + + sg_init_table(&sg, 1); + crypto_hash_init(&desc); + + __bio_for_each_segment(bvec, bio, i, 0) { + sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); + crypto_hash_update(&desc, &sg, sg.length); + } + crypto_hash_final(&desc, digest); +} + +STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + int digest_size; + void *digest; + int ok; + + D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef); + + if (unlikely(cancel)) { + drbd_free_ee(mdev, e); + return 1; + } + + if (likely(drbd_bio_uptodate(e->private_bio))) { + digest_size = crypto_hash_digestsize(mdev->csums_tfm); + digest = kmalloc(digest_size, GFP_KERNEL); + if (digest) { + drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest); + + inc_rs_pending(mdev); + ok = drbd_send_drequest_csum(mdev, + e->sector, + e->size, + digest, + digest_size, + CsumRSRequest); + kfree(digest); + } else { + ERR("kmalloc() of digest failed.\n"); + ok = 0; + } + } else { + drbd_io_error(mdev, FALSE); + ok = 1; + } + + drbd_free_ee(mdev, e); + + if (unlikely(!ok)) + ERR("drbd_send_drequest(..., csum) failed\n"); + return ok; +} + +#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) + +STATIC int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) +{ + struct Tl_epoch_entry *e; + + if (!inc_local(mdev)) + return 0; + + if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE)) + return 2; + + e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY); + if (!e) { + dec_local(mdev); + return 2; + } + + spin_lock_irq(&mdev->req_lock); + list_add(&e->w.list, &mdev->read_ee); + spin_unlock_irq(&mdev->req_lock); + + e->private_bio->bi_end_io = drbd_endio_read_sec; + e->private_bio->bi_rw = READ; + e->w.cb = w_e_send_csum; + + mdev->read_cnt += size >> 9; + drbd_generic_make_request(mdev, DRBD_FAULT_RS_RD, e->private_bio); + + return 1; +} + +void resync_timer_fn(unsigned long data) +{ + unsigned long flags; + struct drbd_conf *mdev = (struct drbd_conf *) data; + int queue; + + spin_lock_irqsave(&mdev->req_lock, flags); + + if (likely(!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))) { + queue = 1; + if (mdev->state.conn == VerifyS) + mdev->resync_work.cb = w_make_ov_request; + else + mdev->resync_work.cb = w_make_resync_request; + } else { + queue = 0; + mdev->resync_work.cb = w_resync_inactive; + } + + spin_unlock_irqrestore(&mdev->req_lock, flags); + + /* harmless race: list_empty outside data.work.q_lock */ + if (list_empty(&mdev->resync_work.list) && queue) + drbd_queue_work(&mdev->data.work, &mdev->resync_work); +} + +int w_make_resync_request(struct drbd_conf *mdev, + struct drbd_work *w, int cancel) +{ + unsigned long bit; + sector_t sector; + const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + int max_segment_size = mdev->rq_queue->max_segment_size; + int number, i, size; + int align; + + if (unlikely(cancel)) + return 1; + + if (unlikely(mdev->state.conn < Connected)) { + ERR("Confused in w_make_resync_request()! cstate < Connected"); + return 0; + } + + if (mdev->state.conn != SyncTarget) + ERR("%s in w_make_resync_request\n", + conns_to_name(mdev->state.conn)); + + if (!inc_local(mdev)) { + /* Since we only need to access mdev->rsync a + inc_local_if_state(mdev,Failed) would be sufficient, but + to continue resync with a broken disk makes no sense at + all */ + ERR("Disk broke down during resync!\n"); + mdev->resync_work.cb = w_resync_inactive; + return 1; + } + /* All goto requeses have to happend after this block: inc_local() */ + + number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ); + + if (atomic_read(&mdev->rs_pending_cnt) > number) + goto requeue; + number -= atomic_read(&mdev->rs_pending_cnt); + + for (i = 0; i < number; i++) { +next_sector: + size = BM_BLOCK_SIZE; + bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); + + if (bit == -1UL) { + mdev->bm_resync_fo = drbd_bm_bits(mdev); + mdev->resync_work.cb = w_resync_inactive; + dec_local(mdev); + return 1; + } + + sector = BM_BIT_TO_SECT(bit); + + if (drbd_try_rs_begin_io(mdev, sector)) { + mdev->bm_resync_fo = bit; + goto requeue; + } + mdev->bm_resync_fo = bit + 1; + + if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) { + drbd_rs_complete_io(mdev, sector); + goto next_sector; + } + +#if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE + /* try to find some adjacent bits. + * we stop if we have already the maximum req size. + * + * Aditionally always align bigger requests, in order to + * be prepared for all stripe sizes of software RAIDs. + * + * we _do_ care about the agreed-uppon q->max_segment_size + * here, as splitting up the requests on the other side is more + * difficult. the consequence is, that on lvm and md and other + * "indirect" devices, this is dead code, since + * q->max_segment_size will be PAGE_SIZE. + */ + align = 1; + for (;;) { + if (size + BM_BLOCK_SIZE > max_segment_size) + break; + + /* Be always aligned */ + if (sector & ((1<<(align+3))-1)) + break; + + /* do not cross extent boundaries */ + if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) + break; + /* now, is it actually dirty, after all? + * caution, drbd_bm_test_bit is tri-state for some + * obscure reason; ( b == 0 ) would get the out-of-band + * only accidentally right because of the "oddly sized" + * adjustment below */ + if (drbd_bm_test_bit(mdev, bit+1) != 1) + break; + bit++; + size += BM_BLOCK_SIZE; + if ((BM_BLOCK_SIZE << align) <= size) + align++; + i++; + } + /* if we merged some, + * reset the offset to start the next drbd_bm_find_next from */ + if (size > BM_BLOCK_SIZE) + mdev->bm_resync_fo = bit + 1; +#endif + + /* adjust very last sectors, in case we are oddly sized */ + if (sector + (size>>9) > capacity) + size = (capacity-sector)<<9; + if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) { + switch (read_for_csum(mdev, sector, size)) { + case 0: /* Disk failure*/ + dec_local(mdev); + return 0; + case 2: /* Allocation failed */ + drbd_rs_complete_io(mdev, sector); + mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); + goto requeue; + /* case 1: everything ok */ + } + } else { + inc_rs_pending(mdev); + if (!drbd_send_drequest(mdev, RSDataRequest, + sector, size, ID_SYNCER)) { + ERR("drbd_send_drequest() failed, aborting...\n"); + dec_rs_pending(mdev); + dec_local(mdev); + return 0; + } + } + } + + if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) { + /* last syncer _request_ was sent, + * but the RSDataReply not yet received. sync will end (and + * next sync group will resume), as soon as we receive the last + * resync data block, and the last bit is cleared. + * until then resync "work" is "inactive" ... + */ + mdev->resync_work.cb = w_resync_inactive; + dec_local(mdev); + return 1; + } + + requeue: + mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); + dec_local(mdev); + return 1; +} + +int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + int number, i, size; + sector_t sector; + const sector_t capacity = drbd_get_capacity(mdev->this_bdev); + + if (unlikely(cancel)) + return 1; + + if (unlikely(mdev->state.conn < Connected)) { + ERR("Confused in w_make_ov_request()! cstate < Connected"); + return 0; + } + + number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ); + if (atomic_read(&mdev->rs_pending_cnt) > number) + goto requeue; + + number -= atomic_read(&mdev->rs_pending_cnt); + + sector = mdev->ov_position; + for (i = 0; i < number; i++) { + size = BM_BLOCK_SIZE; + + if (drbd_try_rs_begin_io(mdev, sector)) { + mdev->ov_position = sector; + goto requeue; + } + + if (sector + (size>>9) > capacity) + size = (capacity-sector)<<9; + + inc_rs_pending(mdev); + if (!drbd_send_ov_request(mdev, sector, size)) { + dec_rs_pending(mdev); + return 0; + } + sector += BM_SECT_PER_BIT; + if (sector >= capacity) { + mdev->resync_work.cb = w_resync_inactive; + + return 1; + } + } + mdev->ov_position = sector; + + requeue: + mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); + return 1; +} + + +int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + kfree(w); + ov_oos_print(mdev); + drbd_resync_finished(mdev); + + return 1; +} + +STATIC int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + kfree(w); + + drbd_resync_finished(mdev); + + return 1; +} + +int drbd_resync_finished(struct drbd_conf *mdev) +{ + unsigned long db, dt, dbdt; + unsigned long n_oos; + union drbd_state_t os, ns; + struct drbd_work *w; + char *khelper_cmd = NULL; + + /* Remove all elements from the resync LRU. Since future actions + * might set bits in the (main) bitmap, then the entries in the + * resync LRU would be wrong. */ + if (drbd_rs_del_all(mdev)) { + /* In case this is not possible now, most probabely because + * there are RSDataReply Packets lingering on the worker's + * queue (or even the read operations for those packets + * is not finished by now). Retry in 100ms. */ + + drbd_kick_lo(mdev); + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 10); + w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); + if (w) { + w->cb = w_resync_finished; + drbd_queue_work(&mdev->data.work, w); + return 1; + } + ERR("Warn failed to drbd_rs_del_all() and to kmalloc(w).\n"); + } + + dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; + if (dt <= 0) + dt = 1; + db = mdev->rs_total; + dbdt = Bit2KB(db/dt); + mdev->rs_paused /= HZ; + + if (!inc_local(mdev)) + goto out; + + spin_lock_irq(&mdev->req_lock); + os = mdev->state; + + /* This protects us against multiple calls (that can happen in the presence + of application IO), and against connectivity loss just before we arrive here. */ + if (os.conn <= Connected) + goto out_unlock; + + ns = os; + ns.conn = Connected; + + INFO("%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", + (os.conn == VerifyS || os.conn == VerifyT) ? + "Online verify " : "Resync", + dt + mdev->rs_paused, mdev->rs_paused, dbdt); + + n_oos = drbd_bm_total_weight(mdev); + + if (os.conn == VerifyS || os.conn == VerifyT) { + if (n_oos) { + ALERT("Online verify found %lu %dk block out of sync!\n", + n_oos, Bit2KB(1)); + khelper_cmd = "out-of-sync"; + } + } else { + D_ASSERT((n_oos - mdev->rs_failed) == 0); + + if (os.conn == SyncTarget || os.conn == PausedSyncT) + khelper_cmd = "after-resync-target"; + + if (mdev->csums_tfm && mdev->rs_total) { + const unsigned long s = mdev->rs_same_csum; + const unsigned long t = mdev->rs_total; + const int ratio = + (t == 0) ? 0 : + (t < 100000) ? ((s*100)/t) : (s/(t/100)); + INFO("%u %% had equal check sums, eliminated: %luK; " + "transferred %luK total %luK\n", + ratio, + Bit2KB(mdev->rs_same_csum), + Bit2KB(mdev->rs_total - mdev->rs_same_csum), + Bit2KB(mdev->rs_total)); + } + } + + if (mdev->rs_failed) { + INFO(" %lu failed blocks\n", mdev->rs_failed); + + if (os.conn == SyncTarget || os.conn == PausedSyncT) { + ns.disk = Inconsistent; + ns.pdsk = UpToDate; + } else { + ns.disk = UpToDate; + ns.pdsk = Inconsistent; + } + } else { + ns.disk = UpToDate; + ns.pdsk = UpToDate; + + if (os.conn == SyncTarget || os.conn == PausedSyncT) { + if (mdev->p_uuid) { + int i; + for (i = Bitmap ; i <= History_end ; i++) + _drbd_uuid_set(mdev, i, mdev->p_uuid[i]); + drbd_uuid_set(mdev, Bitmap, mdev->bc->md.uuid[Current]); + _drbd_uuid_set(mdev, Current, mdev->p_uuid[Current]); + } else { + ERR("mdev->p_uuid is NULL! BUG\n"); + } + } + + drbd_uuid_set_bm(mdev, 0UL); + + if (mdev->p_uuid) { + /* Now the two UUID sets are equal, update what we + * know of the peer. */ + int i; + for (i = Current ; i <= History_end ; i++) + mdev->p_uuid[i] = mdev->bc->md.uuid[i]; + } + } + + _drbd_set_state(mdev, ns, ChgStateVerbose, NULL); +out_unlock: + spin_unlock_irq(&mdev->req_lock); + dec_local(mdev); +out: + mdev->rs_total = 0; + mdev->rs_failed = 0; + mdev->rs_paused = 0; + + if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) { + drbd_WARN("Writing the whole bitmap, due to failed kmalloc\n"); + drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); + } + + drbd_bm_recount_bits(mdev); + + if (khelper_cmd) + drbd_khelper(mdev, khelper_cmd); + + return 1; +} + +/** + * w_e_end_data_req: Send the answer (DataReply) in response to a DataRequest. + */ +int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + int ok; + + if (unlikely(cancel)) { + drbd_free_ee(mdev, e); + dec_unacked(mdev); + return 1; + } + + if (likely(drbd_bio_uptodate(e->private_bio))) { + ok = drbd_send_block(mdev, DataReply, e); + } else { + if (__ratelimit(&drbd_ratelimit_state)) + ERR("Sending NegDReply. sector=%llus.\n", + (unsigned long long)e->sector); + + ok = drbd_send_ack(mdev, NegDReply, e); + + drbd_io_error(mdev, FALSE); + } + + dec_unacked(mdev); + + spin_lock_irq(&mdev->req_lock); + if (drbd_bio_has_active_page(e->private_bio)) { + /* This might happen if sendpage() has not finished */ + list_add_tail(&e->w.list, &mdev->net_ee); + } else { + drbd_free_ee(mdev, e); + } + spin_unlock_irq(&mdev->req_lock); + + if (unlikely(!ok)) + ERR("drbd_send_block() failed\n"); + return ok; +} + +/** + * w_e_end_rsdata_req: Send the answer (RSDataReply) to a RSDataRequest. + */ +int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + int ok; + + if (unlikely(cancel)) { + drbd_free_ee(mdev, e); + dec_unacked(mdev); + return 1; + } + + if (inc_local_if_state(mdev, Failed)) { + drbd_rs_complete_io(mdev, e->sector); + dec_local(mdev); + } + + if (likely(drbd_bio_uptodate(e->private_bio))) { + if (likely(mdev->state.pdsk >= Inconsistent)) { + inc_rs_pending(mdev); + ok = drbd_send_block(mdev, RSDataReply, e); + } else { + if (__ratelimit(&drbd_ratelimit_state)) + ERR("Not sending RSDataReply, " + "partner DISKLESS!\n"); + ok = 1; + } + } else { + if (__ratelimit(&drbd_ratelimit_state)) + ERR("Sending NegRSDReply. sector %llus.\n", + (unsigned long long)e->sector); + + ok = drbd_send_ack(mdev, NegRSDReply, e); + + drbd_io_error(mdev, FALSE); + + /* update resync data with failure */ + drbd_rs_failed_io(mdev, e->sector, e->size); + } + + dec_unacked(mdev); + + spin_lock_irq(&mdev->req_lock); + if (drbd_bio_has_active_page(e->private_bio)) { + /* This might happen if sendpage() has not finished */ + list_add_tail(&e->w.list, &mdev->net_ee); + } else { + drbd_free_ee(mdev, e); + } + spin_unlock_irq(&mdev->req_lock); + + if (unlikely(!ok)) + ERR("drbd_send_block() failed\n"); + return ok; +} + +int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct digest_info *di; + int digest_size; + void *digest = NULL; + int ok, eq = 0; + + if (unlikely(cancel)) { + drbd_free_ee(mdev, e); + dec_unacked(mdev); + return 1; + } + + drbd_rs_complete_io(mdev, e->sector); + + di = (struct digest_info *)(unsigned long)e->block_id; + + if (likely(drbd_bio_uptodate(e->private_bio))) { + /* quick hack to try to avoid a race against reconfiguration. + * a real fix would be much more involved, + * introducing more locking mechanisms */ + if (mdev->csums_tfm) { + digest_size = crypto_hash_digestsize(mdev->csums_tfm); + D_ASSERT(digest_size == di->digest_size); + digest = kmalloc(digest_size, GFP_KERNEL); + } + if (digest) { + drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest); + eq = !memcmp(digest, di->digest, digest_size); + kfree(digest); + } + + if (eq) { + drbd_set_in_sync(mdev, e->sector, e->size); + mdev->rs_same_csum++; + ok = drbd_send_ack(mdev, RSIsInSync, e); + } else { + inc_rs_pending(mdev); + e->block_id = ID_SYNCER; + ok = drbd_send_block(mdev, RSDataReply, e); + } + } else { + ok = drbd_send_ack(mdev, NegRSDReply, e); + if (__ratelimit(&drbd_ratelimit_state)) + ERR("Sending NegDReply. I guess it gets messy.\n"); + drbd_io_error(mdev, FALSE); + } + + dec_unacked(mdev); + + kfree(di); + + spin_lock_irq(&mdev->req_lock); + if (drbd_bio_has_active_page(e->private_bio)) { + /* This might happen if sendpage() has not finished */ + list_add_tail(&e->w.list, &mdev->net_ee); + } else { + drbd_free_ee(mdev, e); + } + spin_unlock_irq(&mdev->req_lock); + + if (unlikely(!ok)) + ERR("drbd_send_block/ack() failed\n"); + return ok; +} + +int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + int digest_size; + void *digest; + int ok = 1; + + if (unlikely(cancel)) { + drbd_free_ee(mdev, e); + dec_unacked(mdev); + return 1; + } + + if (likely(drbd_bio_uptodate(e->private_bio))) { + digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest = kmalloc(digest_size, GFP_KERNEL); + if (digest) { + drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest); + ok = drbd_send_drequest_csum(mdev, e->sector, e->size, + digest, digest_size, OVReply); + if (ok) + inc_rs_pending(mdev); + kfree(digest); + } + } + + dec_unacked(mdev); + + spin_lock_irq(&mdev->req_lock); + drbd_free_ee(mdev, e); + spin_unlock_irq(&mdev->req_lock); + + return ok; +} + +void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) +{ + if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) { + mdev->ov_last_oos_size += size>>9; + } else { + mdev->ov_last_oos_start = sector; + mdev->ov_last_oos_size = size>>9; + } + drbd_set_out_of_sync(mdev, sector, size); + set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); +} + +int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct digest_info *di; + int digest_size; + void *digest; + int ok, eq = 0; + + if (unlikely(cancel)) { + drbd_free_ee(mdev, e); + dec_unacked(mdev); + return 1; + } + + /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all + * the resync lru has been cleaned up already */ + drbd_rs_complete_io(mdev, e->sector); + + di = (struct digest_info *)(unsigned long)e->block_id; + + if (likely(drbd_bio_uptodate(e->private_bio))) { + digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest = kmalloc(digest_size, GFP_KERNEL); + if (digest) { + drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest); + + D_ASSERT(digest_size == di->digest_size); + eq = !memcmp(digest, di->digest, digest_size); + kfree(digest); + } + } else { + ok = drbd_send_ack(mdev, NegRSDReply, e); + if (__ratelimit(&drbd_ratelimit_state)) + ERR("Sending NegDReply. I guess it gets messy.\n"); + drbd_io_error(mdev, FALSE); + } + + dec_unacked(mdev); + + kfree(di); + + if (!eq) + drbd_ov_oos_found(mdev, e->sector, e->size); + else + ov_oos_print(mdev); + + ok = drbd_send_ack_ex(mdev, OVResult, e->sector, e->size, + eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); + + spin_lock_irq(&mdev->req_lock); + drbd_free_ee(mdev, e); + spin_unlock_irq(&mdev->req_lock); + + if (--mdev->ov_left == 0) { + ov_oos_print(mdev); + drbd_resync_finished(mdev); + } + + return ok; +} + +int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + clear_bit(WORK_PENDING, &mdev->flags); + wake_up(&mdev->misc_wait); + return 1; +} + +int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_barrier *b = (struct drbd_barrier *)w; + struct Drbd_Barrier_Packet *p = &mdev->data.sbuf.Barrier; + int ok = 1; + + /* really avoid racing with tl_clear. w.cb may have been referenced + * just before it was reassigned and requeued, so double check that. + * actually, this race was harmless, since we only try to send the + * barrier packet here, and otherwise do nothing with the object. + * but compare with the head of w_clear_epoch */ + spin_lock_irq(&mdev->req_lock); + if (w->cb != w_send_barrier || mdev->state.conn < Connected) + cancel = 1; + spin_unlock_irq(&mdev->req_lock); + if (cancel) + return 1; + + if (!drbd_get_data_sock(mdev)) + return 0; + p->barrier = b->br_number; + /* inc_ap_pending was done where this was queued. + * dec_ap_pending will be done in got_BarrierAck + * or (on connection loss) in w_clear_epoch. */ + ok = _drbd_send_cmd(mdev, mdev->data.socket, Barrier, + (struct Drbd_Header *)p, sizeof(*p), 0); + drbd_put_data_sock(mdev); + + return ok; +} + +int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + if (cancel) + return 1; + return drbd_send_short_cmd(mdev, UnplugRemote); +} + +/** + * w_send_dblock: Send a mirrored write request. + */ +int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_request *req = (struct drbd_request *)w; + int ok; + + if (unlikely(cancel)) { + req_mod(req, send_canceled, 0); + return 1; + } + + ok = drbd_send_dblock(mdev, req); + req_mod(req, ok ? handed_over_to_network : send_failed, 0); + + return ok; +} + +/** + * w_send_read_req: Send a read requests. + */ +int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_request *req = (struct drbd_request *)w; + int ok; + + if (unlikely(cancel)) { + req_mod(req, send_canceled, 0); + return 1; + } + + ok = drbd_send_drequest(mdev, DataRequest, req->sector, req->size, + (unsigned long)req); + + if (!ok) { + /* ?? we set Timeout or BrokenPipe in drbd_send(); + * so this is probably redundant */ + if (mdev->state.conn >= Connected) + drbd_force_state(mdev, NS(conn, NetworkFailure)); + } + req_mod(req, ok ? handed_over_to_network : send_failed, 0); + + return ok; +} + +STATIC int _drbd_may_sync_now(struct drbd_conf *mdev) +{ + struct drbd_conf *odev = mdev; + + while (1) { + if (odev->sync_conf.after == -1) + return 1; + odev = minor_to_mdev(odev->sync_conf.after); + ERR_IF(!odev) return 1; + if ((odev->state.conn >= SyncSource && + odev->state.conn <= PausedSyncT) || + odev->state.aftr_isp || odev->state.peer_isp || + odev->state.user_isp) + return 0; + } +} + +/** + * _drbd_pause_after: + * Finds all devices that may not resync now, and causes them to + * pause their resynchronisation. + * Called from process context only (admin command and after_state_ch). + */ +STATIC int _drbd_pause_after(struct drbd_conf *mdev) +{ + struct drbd_conf *odev; + int i, rv = 0; + + for (i = 0; i < minor_count; i++) { + odev = minor_to_mdev(i); + if (!odev) + continue; + if (odev->state.conn == StandAlone && odev->state.disk == Diskless) + continue; + if (!_drbd_may_sync_now(odev)) + rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), ChgStateHard, NULL) + != SS_NothingToDo); + } + + return rv; +} + +/** + * _drbd_resume_next: + * Finds all devices that can resume resynchronisation + * process, and causes them to resume. + * Called from process context only (admin command and worker). + */ +STATIC int _drbd_resume_next(struct drbd_conf *mdev) +{ + struct drbd_conf *odev; + int i, rv = 0; + + for (i = 0; i < minor_count; i++) { + odev = minor_to_mdev(i); + if (!odev) + continue; + if (odev->state.aftr_isp) { + if (_drbd_may_sync_now(odev)) + rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0), + ChgStateHard, NULL) + != SS_NothingToDo) ; + } + } + return rv; +} + +void resume_next_sg(struct drbd_conf *mdev) +{ + write_lock_irq(&global_state_lock); + _drbd_resume_next(mdev); + write_unlock_irq(&global_state_lock); +} + +void suspend_other_sg(struct drbd_conf *mdev) +{ + write_lock_irq(&global_state_lock); + _drbd_pause_after(mdev); + write_unlock_irq(&global_state_lock); +} + +void drbd_alter_sa(struct drbd_conf *mdev, int na) +{ + int changes; + + write_lock_irq(&global_state_lock); + mdev->sync_conf.after = na; + + do { + changes = _drbd_pause_after(mdev); + changes |= _drbd_resume_next(mdev); + } while (changes); + + write_unlock_irq(&global_state_lock); +} + +/** + * drbd_start_resync: + * @side: Either SyncSource or SyncTarget + * Start the resync process. Called from process context only, + * either admin command or drbd_receiver. + * Note, this function might bring you directly into one of the + * PausedSync* states. + */ +void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) +{ + union drbd_state_t ns; + int r; + + MTRACE(TraceTypeResync, TraceLvlSummary, + INFO("Resync starting: side=%s\n", + side == SyncTarget ? "SyncTarget" : "SyncSource"); + ); + + drbd_bm_recount_bits(mdev); + + /* In case a previous resync run was aborted by an IO error... */ + drbd_rs_cancel_all(mdev); + + if (side == SyncTarget) { + /* Since application IO was locked out during WFBitMapT and + WFSyncUUID we are still unmodified. Before going to SyncTarget + we check that we might make the data inconsistent. */ + r = drbd_khelper(mdev, "before-resync-target"); + r = (r >> 8) & 0xff; + if (r > 0) { + INFO("before-resync-target handler returned %d, " + "dropping connection.\n", r); + drbd_force_state(mdev, NS(conn, Disconnecting)); + return; + } + } + + drbd_state_lock(mdev); + + if (!inc_local_if_state(mdev, Negotiating)) { + drbd_state_unlock(mdev); + return; + } + + if (side == SyncTarget) { + mdev->bm_resync_fo = 0; + } else /* side == SyncSource */ { + u64 uuid; + + get_random_bytes(&uuid, sizeof(u64)); + drbd_uuid_set(mdev, Bitmap, uuid); + drbd_send_sync_uuid(mdev, uuid); + + D_ASSERT(mdev->state.disk == UpToDate); + } + + write_lock_irq(&global_state_lock); + ns = mdev->state; + + ns.aftr_isp = !_drbd_may_sync_now(mdev); + + ns.conn = side; + + if (side == SyncTarget) + ns.disk = Inconsistent; + else /* side == SyncSource */ + ns.pdsk = Inconsistent; + + r = __drbd_set_state(mdev, ns, ChgStateVerbose, NULL); + ns = mdev->state; + + if (ns.conn < Connected) + r = SS_UnknownError; + + if (r == SS_Success) { + mdev->rs_total = + mdev->rs_mark_left = drbd_bm_total_weight(mdev); + mdev->rs_failed = 0; + mdev->rs_paused = 0; + mdev->rs_start = + mdev->rs_mark_time = jiffies; + mdev->rs_same_csum = 0; + _drbd_pause_after(mdev); + } + write_unlock_irq(&global_state_lock); + drbd_state_unlock(mdev); + dec_local(mdev); + + if (r == SS_Success) { + INFO("Began resync as %s (will sync %lu KB [%lu bits set]).\n", + conns_to_name(ns.conn), + (unsigned long) mdev->rs_total << (BM_BLOCK_SIZE_B-10), + (unsigned long) mdev->rs_total); + + if (mdev->rs_total == 0) { + drbd_resync_finished(mdev); + return; + } + + if (ns.conn == SyncTarget) { + D_ASSERT(!test_bit(STOP_SYNC_TIMER, &mdev->flags)); + mod_timer(&mdev->resync_timer, jiffies); + } + + drbd_md_sync(mdev); + } +} + +int drbd_worker(struct Drbd_thread *thi) +{ + struct drbd_conf *mdev = thi->mdev; + struct drbd_work *w = NULL; + LIST_HEAD(work_list); + int intr = 0, i; + + sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); + + while (get_t_state(thi) == Running) { + drbd_thread_current_set_cpu(mdev); + + if (down_trylock(&mdev->data.work.s)) { + mutex_lock(&mdev->data.mutex); + if (mdev->data.socket && !mdev->net_conf->no_cork) + drbd_tcp_uncork(mdev->data.socket); + mutex_unlock(&mdev->data.mutex); + + intr = down_interruptible(&mdev->data.work.s); + + mutex_lock(&mdev->data.mutex); + if (mdev->data.socket && !mdev->net_conf->no_cork) + drbd_tcp_cork(mdev->data.socket); + mutex_unlock(&mdev->data.mutex); + } + + if (intr) { + D_ASSERT(intr == -EINTR); + flush_signals(current); + ERR_IF (get_t_state(thi) == Running) + continue; + break; + } + + if (get_t_state(thi) != Running) + break; + /* With this break, we have done a down() but not consumed + the entry from the list. The cleanup code takes care of + this... */ + + w = NULL; + spin_lock_irq(&mdev->data.work.q_lock); + ERR_IF(list_empty(&mdev->data.work.q)) { + /* something terribly wrong in our logic. + * we were able to down() the semaphore, + * but the list is empty... doh. + * + * what is the best thing to do now? + * try again from scratch, restarting the receiver, + * asender, whatnot? could break even more ugly, + * e.g. when we are primary, but no good local data. + * + * I'll try to get away just starting over this loop. + */ + spin_unlock_irq(&mdev->data.work.q_lock); + continue; + } + w = list_entry(mdev->data.work.q.next, struct drbd_work, list); + list_del_init(&w->list); + spin_unlock_irq(&mdev->data.work.q_lock); + + if (!w->cb(mdev, w, mdev->state.conn < Connected)) { + /* drbd_WARN("worker: a callback failed! \n"); */ + if (mdev->state.conn >= Connected) + drbd_force_state(mdev, + NS(conn, NetworkFailure)); + } + } + + spin_lock_irq(&mdev->data.work.q_lock); + i = 0; + while (!list_empty(&mdev->data.work.q)) { + list_splice_init(&mdev->data.work.q, &work_list); + spin_unlock_irq(&mdev->data.work.q_lock); + + while (!list_empty(&work_list)) { + w = list_entry(work_list.next, struct drbd_work, list); + list_del_init(&w->list); + w->cb(mdev, w, 1); + i++; /* dead debugging code */ + } + + spin_lock_irq(&mdev->data.work.q_lock); + } + sema_init(&mdev->data.work.s, 0); + /* DANGEROUS race: if someone did queue his work within the spinlock, + * but up() ed outside the spinlock, we could get an up() on the + * semaphore without corresponding list entry. + * So don't do that. + */ + spin_unlock_irq(&mdev->data.work.q_lock); + + D_ASSERT(mdev->state.disk == Diskless && mdev->state.conn == StandAlone); + /* _drbd_set_state only uses stop_nowait. + * wait here for the Exiting receiver. */ + drbd_thread_stop(&mdev->receiver); + drbd_mdev_cleanup(mdev); + + INFO("worker terminated\n"); + + return 0; +} -- cgit v1.2.3 From 3fcfac8857c004ec8142421d9cc809659c7676e5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:20 +0200 Subject: DRBD: variable_length_integer_encoding Encoding of our simple LRE compression scheme. It is very effective since large parts of our bitmap are sparse. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_vli.h | 474 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 474 insertions(+) create mode 100644 drivers/block/drbd/drbd_vli.h diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h new file mode 100644 index 000000000000..8f5e8186852d --- /dev/null +++ b/drivers/block/drbd/drbd_vli.h @@ -0,0 +1,474 @@ +/* +-*- linux-c -*- + drbd_receiver.c + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _DRBD_VLI_H +#define _DRBD_VLI_H + +/* + * At a granularity of 4KiB storage represented per bit, + * and stroage sizes of several TiB, + * and possibly small-bandwidth replication, + * the bitmap transfer time can take much too long, + * if transmitted in plain text. + * + * We try to reduce the transfered bitmap information + * by encoding runlengths of bit polarity. + * + * We never actually need to encode a "zero" (runlengths are positive). + * But then we have to store the value of the first bit. + * So we can as well have the "zero" be a valid runlength, + * and start encoding/decoding by "number of _set_ bits" by convention. + * + * We assume that large areas are either completely set or unset, + * which gives good compression with any runlength method, + * even when encoding the runlength as fixed size 32bit/64bit integers. + * + * Still, there may be areas where the polarity flips every few bits, + * and encoding the runlength sequence of those ares with fix size + * integers would be much worse than plaintext. + * + * We want to encode small runlength values with minimum code length, + * while still being able to encode a Huge run of all zeros. + * + * Thus we need a Variable Length Integer encoding, VLI. + * + * For runlength < 8, we produce more code bits than plaintext input. + * we need to send incompressible chunks as plaintext, skip over them + * and then see if the next chunk compresses better. + * + * We don't care too much about "excellent" compression ratio + * for large runlengths, 249 bit/24 bit still gives a factor of > 10. + * + * We care for cpu time needed to actually encode/decode + * into the transmitted byte stream. + * + * There are endless variants of VLI. + * For this special purpose, we just need something that is "good enough", + * and easy to understand and code, fast to encode and decode, + * and does not consume memory. + */ + +/* + * buf points to the current position in the tranfered byte stream. + * stream is by definition little endian. + * *buf_len gives the remaining number of bytes at that position. + * *out will receive the decoded value. + * returns number of bytes consumed, + * or 0 if not enough bytes left in buffer (which would be invalid input). + */ +static inline int vli_decode_bytes(u64 *out, unsigned char *buf, unsigned buf_len) +{ + u64 tmp = 0; + unsigned bytes; /* extra bytes after code byte */ + + if (buf_len == 0) + return 0; + + switch(*buf) { + case 0xff: bytes = 8; break; + case 0xfe: bytes = 7; break; + case 0xfd: bytes = 6; break; + case 0xfc: bytes = 5; break; + case 0xfb: bytes = 4; break; + case 0xfa: bytes = 3; break; + case 0xf9: bytes = 2; break; + default: + *out = *buf; + return 1; + } + + if (buf_len <= bytes) + return 0; + + /* no pointer cast assignment, there may be funny alignment + * requirements on certain architectures */ + memcpy(&tmp, buf+1, bytes); + *out = le64_to_cpu(tmp); + return bytes+1; +} + +/* + * similarly, encode n into buf. + * returns consumed bytes, + * or zero if not enough room left in buffer + * (in which case the buf is left unchanged). + * + * encoding is little endian, first byte codes how much bytes follow. + * first byte <= 0xf8 means just this byte, value = code byte. + * first byte == 0xf9 .. 0xff: (code byte - 0xf7) data bytes follow. + */ +static inline int vli_encode_bytes(unsigned char *buf, u64 n, unsigned buf_len) +{ + unsigned bytes; /* _extra_ bytes after code byte */ + + if (buf_len == 0) + return 0; + + if (n <= 0xf8) { + *buf = (unsigned char)n; + return 1; + } + + bytes = (n < (1ULL << 32)) + ? (n < (1ULL << 16)) ? 2 + : (n < (1ULL << 24)) ? 3 : 4 + : (n < (1ULL << 48)) ? + (n < (1ULL << 40)) ? 5 : 6 + : (n < (1ULL << 56)) ? 7 : 8; + + if (buf_len <= bytes) + return 0; + + /* no pointer cast assignment, there may be funny alignment + * requirements on certain architectures */ + *buf++ = 0xf7 + bytes; /* code, 0xf9 .. 0xff */ + n = cpu_to_le64(n); + memcpy(buf, &n, bytes); /* plain */ + return bytes+1; +} + +/* ================================================================== */ + +/* And here the more involved variants of VLI. + * + * Code length is determined by some unique (e.g. unary) prefix. + * This encodes arbitrary bit length, not whole bytes: we have a bit-stream, + * not a byte stream. + */ + +/* for the bitstream, we need a cursor */ +struct bitstream_cursor { + /* the current byte */ + u8 *b; + /* the current bit within *b, nomalized: 0..7 */ + unsigned int bit; +}; + +/* initialize cursor to point to first bit of stream */ +static inline void bitstream_cursor_reset(struct bitstream_cursor *cur, void *s) +{ + cur->b = s; + cur->bit = 0; +} + +/* advance cursor by that many bits; maximum expected input value: 64, + * but depending on VLI implementation, it may be more. */ +static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits) +{ + bits += cur->bit; + cur->b = cur->b + (bits >> 3); + cur->bit = bits & 7; +} + +/* the bitstream itself knows its length */ +struct bitstream { + struct bitstream_cursor cur; + unsigned char *buf; + size_t buf_len; /* in bytes */ + + /* for input stream: + * number of trailing 0 bits for padding + * total number of valid bits in stream: buf_len * 8 - pad_bits */ + unsigned int pad_bits; +}; + +static inline void bitstream_init(struct bitstream *bs, void *s, size_t len, unsigned int pad_bits) +{ + bs->buf = s; + bs->buf_len = len; + bs->pad_bits = pad_bits; + bitstream_cursor_reset(&bs->cur, bs->buf); +} + +static inline void bitstream_rewind(struct bitstream *bs) +{ + bitstream_cursor_reset(&bs->cur, bs->buf); + memset(bs->buf, 0, bs->buf_len); +} + +/* Put (at most 64) least significant bits of val into bitstream, and advance cursor. + * Ignores "pad_bits". + * Returns zero if bits == 0 (nothing to do). + * Returns number of bits used if successful. + * + * If there is not enough room left in bitstream, + * leaves bitstream unchanged and returns -ENOBUFS. + */ +static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits) +{ + unsigned char *b = bs->cur.b; + unsigned int tmp; + + if (bits == 0) + return 0; + + if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len) + return -ENOBUFS; + + /* paranoia: strip off hi bits; they should not be set anyways. */ + if (bits < 64) + val &= ~0ULL >> (64 - bits); + + *b++ |= (val & 0xff) << bs->cur.bit; + + for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8) + *b++ |= (val >> tmp) & 0xff; + + bitstream_cursor_advance(&bs->cur, bits); + return bits; +} + +/* Fetch (at most 64) bits from bitstream into *out, and advance cursor. + * + * If more than 64 bits are requested, returns -EINVAL and leave *out unchanged. + * + * If there are less than the requested number of valid bits left in the + * bitstream, still fetches all available bits. + * + * Returns number of actually fetched bits. + */ +static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits) +{ + u64 val; + unsigned int n; + + if (bits > 64) + return -EINVAL; + + if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len) + bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3) + - bs->cur.bit - bs->pad_bits; + + if (bits == 0) { + *out = 0; + return 0; + } + + /* get the high bits */ + val = 0; + n = (bs->cur.bit + bits + 7) >> 3; + /* n may be at most 9, if cur.bit + bits > 64 */ + /* which means this copies at most 8 byte */ + if (n) { + memcpy(&val, bs->cur.b+1, n - 1); + val = le64_to_cpu(val) << (8 - bs->cur.bit); + } + + /* we still need the low bits */ + val |= bs->cur.b[0] >> bs->cur.bit; + + /* and mask out bits we don't want */ + val &= ~0ULL >> (64 - bits); + + bitstream_cursor_advance(&bs->cur, bits); + *out = val; + + return bits; +} + +/* we still need to actually define the code. */ + +/* + * encoding is "visualised" as + * __little endian__ bitstream, least significant bit first (left most) + * + * this particular encoding is chosen so that the prefix code + * starts as unary encoding the level, then modified so that + * 11 levels can be described in 8bit, with minimal overhead + * for the smaller levels. + * + * Number of data bits follow fibonacci sequence, with the exception of the + * last level (+1 data bit, so it makes 64bit total). The only worse code when + * encoding bit polarity runlength is 2 plain bits => 3 code bits. +prefix data bits max val Nº data bits +0 0x1 0 +10 x 0x3 1 +110 x 0x5 1 +1110 xx 0x9 2 +11110 xxx 0x11 3 +1111100 x xxxx 0x31 5 +1111101 x xxxxxxx 0x131 8 +11111100 xxxxxxxx xxxxx 0x2131 13 +11111110 xxxxxxxx xxxxxxxx xxxxx 0x202131 21 +11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202131 34 +11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56 + * maximum encodable value: 0x100000400202131 == 2**56 + some */ + +/* LEVEL: (total bits, prefix bits, prefix value), + * sorted ascending by number of total bits. + * The rest of the code table is calculated at compiletime from this. */ + +/* fibonacci data 0, 1, ... */ +#define VLI_L_0_1() do { \ + LEVEL( 1, 1, 0x00); \ + LEVEL( 3, 2, 0x01); \ + LEVEL( 4, 3, 0x03); \ + LEVEL( 6, 4, 0x07); \ + LEVEL( 8, 5, 0x0f); \ + LEVEL(12, 7, 0x1f); \ + LEVEL(15, 7, 0x5f); \ + LEVEL(21, 8, 0x3f); \ + LEVEL(29, 8, 0x7f); \ + LEVEL(42, 8, 0xbf); \ + LEVEL(64, 8, 0xff); \ + } while (0) + +/* Some variants, differeing in number of levels, prefix value, and number of + * databits in each level. I tried a lot of variants. Those where the number + * of data bits follows the fibonacci sequence (with a certain offset) simply + * "look best" ;-) + * All of these can encode at least "2 ** 56". */ + +/* fibonacci data 1, 1, ... */ +#define VLI_L_1_1() do { \ + LEVEL( 2, 1, 0x00); \ + LEVEL( 3, 2, 0x01); \ + LEVEL( 5, 3, 0x03); \ + LEVEL( 7, 4, 0x07); \ + LEVEL(10, 5, 0x0f); \ + LEVEL(14, 6, 0x1f); \ + LEVEL(21, 8, 0x3f); \ + LEVEL(29, 8, 0x7f); \ + LEVEL(42, 8, 0xbf); \ + LEVEL(64, 8, 0xff); \ + } while (0) + +/* fibonacci data 1, 2, ... */ +#define VLI_L_1_2() do { \ + LEVEL( 2, 1, 0x00); \ + LEVEL( 4, 2, 0x01); \ + LEVEL( 6, 3, 0x03); \ + LEVEL( 9, 4, 0x07); \ + LEVEL(13, 5, 0x0f); \ + LEVEL(19, 6, 0x1f); \ + LEVEL(28, 7, 0x3f); \ + LEVEL(42, 8, 0x7f); \ + LEVEL(64, 8, 0xff); \ + } while (0) + +/* fibonacci data 2, 3, ... */ +#define VLI_L_2_3() do { \ + LEVEL( 3, 1, 0x00); \ + LEVEL( 5, 2, 0x01); \ + LEVEL( 8, 3, 0x03); \ + LEVEL(12, 4, 0x07); \ + LEVEL(18, 5, 0x0f); \ + LEVEL(27, 6, 0x1f); \ + LEVEL(41, 7, 0x3f); \ + LEVEL(64, 7, 0x5f); \ + } while (0) + +/* fibonacci data 3, 5, ... */ +#define VLI_L_3_5() do { \ + LEVEL( 4, 1, 0x00); \ + LEVEL( 7, 2, 0x01); \ + LEVEL(11, 3, 0x03); \ + LEVEL(17, 4, 0x07); \ + LEVEL(26, 5, 0x0f); \ + LEVEL(40, 6, 0x1f); \ + LEVEL(64, 6, 0x3f); \ + } while (0) + +/* CONFIG */ +#ifndef VLI_LEVELS +#define VLI_LEVELS() VLI_L_3_5() +#endif + +/* finds a suitable level to decode the least significant part of in. + * returns number of bits consumed. + * + * BUG() for bad input, as that would mean a buggy code table. */ +static inline int vli_decode_bits(u64 *out, const u64 in) +{ + u64 adj = 1; + +#define LEVEL(t,b,v) \ + do { \ + if ((in & ((1 << b) -1)) == v) { \ + *out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \ + return t; \ + } \ + adj += 1ULL << (t - b); \ + } while (0) + + VLI_LEVELS(); + + /* NOT REACHED, if VLI_LEVELS code table is defined properly */ + BUG(); +#undef LEVEL +} + +/* return number of code bits needed, + * or negative error number */ +static inline int __vli_encode_bits(u64 *out, const u64 in) +{ + u64 max = 0; + u64 adj = 1; + + if (in == 0) + return -EINVAL; + +#define LEVEL(t,b,v) do { \ + max += 1ULL << (t - b); \ + if (in <= max) { \ + if (out) \ + *out = ((in - adj) << b) | v; \ + return t; \ + } \ + adj = max + 1; \ + } while (0) + + VLI_LEVELS(); + + return -EOVERFLOW; +#undef LEVEL +} + +/* encodes @in as vli into @bs; + + * return values + * > 0: number of bits successfully stored in bitstream + * -ENOBUFS @bs is full + * -EINVAL input zero (invalid) + * -EOVERFLOW input too large for this vli code (invalid) + */ +static inline int vli_encode_bits(struct bitstream *bs, u64 in) +{ + u64 code = code; + int bits = __vli_encode_bits(&code, in); + + if (bits <= 0) + return bits; + + return bitstream_put_bits(bs, code, bits); +} + +#undef VLI_L_0_1 +#undef VLI_L_1_1 +#undef VLI_L_1_2 +#undef VLI_L_2_3 +#undef VLI_L_3_5 + +#undef VLI_LEVELS +#endif -- cgit v1.2.3 From 11e154b84769c247b57e751f485a610c8f8fa248 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:22 +0200 Subject: DRBD: misc buildtag.c tag will go away when we are not longer an external module. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_buildtag.c | 7 +++ drivers/block/drbd/drbd_strings.c | 115 +++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 drivers/block/drbd/drbd_buildtag.c create mode 100644 drivers/block/drbd/drbd_strings.c diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c new file mode 100644 index 000000000000..c78d252a889f --- /dev/null +++ b/drivers/block/drbd/drbd_buildtag.c @@ -0,0 +1,7 @@ +/* automatically generated. DO NOT EDIT. */ +#include +const char *drbd_buildtag(void) +{ + return "GIT-hash: c74771beb9598144d31b861e7ea966f914914c4f drbd/drbd_actlog.c drbd/drbd_bitmap.c drbd/drbd_int.h drbd/drbd_main.c drbd/drbd_receiver.c drbd/drbd_req.c drbd/drbd_worker.c" + " build by phil@fat-tyre, 2009-03-30 16:54:38"; +} diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c new file mode 100644 index 000000000000..491019c8331d --- /dev/null +++ b/drivers/block/drbd/drbd_strings.c @@ -0,0 +1,115 @@ +/* + drbd.h + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +#include + +static const char *drbd_conn_s_names[] = { + [StandAlone] = "StandAlone", + [Disconnecting] = "Disconnecting", + [Unconnected] = "Unconnected", + [Timeout] = "Timeout", + [BrokenPipe] = "BrokenPipe", + [NetworkFailure] = "NetworkFailure", + [ProtocolError] = "ProtocolError", + [WFConnection] = "WFConnection", + [WFReportParams] = "WFReportParams", + [TearDown] = "TearDown", + [Connected] = "Connected", + [StartingSyncS] = "StartingSyncS", + [StartingSyncT] = "StartingSyncT", + [WFBitMapS] = "WFBitMapS", + [WFBitMapT] = "WFBitMapT", + [WFSyncUUID] = "WFSyncUUID", + [SyncSource] = "SyncSource", + [SyncTarget] = "SyncTarget", + [VerifyS] = "VerifyS", + [VerifyT] = "VerifyT", + [PausedSyncS] = "PausedSyncS", + [PausedSyncT] = "PausedSyncT" +}; + +static const char *drbd_role_s_names[] = { + [Primary] = "Primary", + [Secondary] = "Secondary", + [Unknown] = "Unknown" +}; + +static const char *drbd_disk_s_names[] = { + [Diskless] = "Diskless", + [Attaching] = "Attaching", + [Failed] = "Failed", + [Negotiating] = "Negotiating", + [Inconsistent] = "Inconsistent", + [Outdated] = "Outdated", + [DUnknown] = "DUnknown", + [Consistent] = "Consistent", + [UpToDate] = "UpToDate", +}; + +static const char *drbd_state_sw_errors[] = { + [-SS_TwoPrimaries] = "Multiple primaries not allowed by config", + [-SS_NoUpToDateDisk] = + "Refusing to be Primary without at least one UpToDate disk", + [-SS_BothInconsistent] = "Refusing to be inconsistent on both nodes", + [-SS_SyncingDiskless] = "Refusing to be syncing and diskless", + [-SS_ConnectedOutdates] = "Refusing to be Outdated while Connected", + [-SS_PrimaryNOP] = "Refusing to be Primary while peer is not outdated", + [-SS_ResyncRunning] = "Can not start OV/resync since it is already active", + [-SS_AlreadyStandAlone] = "Can not disconnect a StandAlone device", + [-SS_CW_FailedByPeer] = "State changed was refused by peer node", + [-SS_IsDiskLess] = + "Device is diskless, the requesed operation requires a disk", + [-SS_DeviceInUse] = "Device is held open by someone", + [-SS_NoNetConfig] = "Have no net/connection configuration", + [-SS_NoVerifyAlg] = "Need a verify algorithm to start online verify", + [-SS_NeedConnection] = "Need a connection to start verify or resync", + [-SS_NotSupported] = "Peer does not support protocol", + [-SS_LowerThanOutdated] = "Disk state is lower than outdated", + [-SS_InTransientState] = "In transient state, retry after next state change", + [-SS_ConcurrentStChg] = "Concurrent state changes detected and aborted", +}; + +const char *conns_to_name(enum drbd_conns s) +{ + /* enums are unsigned... */ + return s > PausedSyncT ? "TOO_LARGE" : drbd_conn_s_names[s]; +} + +const char *roles_to_name(enum drbd_role s) +{ + return s > Secondary ? "TOO_LARGE" : drbd_role_s_names[s]; +} + +const char *disks_to_name(enum drbd_disk_state s) +{ + return s > UpToDate ? "TOO_LARGE" : drbd_disk_s_names[s]; +} + +const char *set_st_err_name(enum set_st_err err) +{ + return err <= SS_AfterLastError ? "TOO_SMALL" : + err > SS_TwoPrimaries ? "TOO_LARGE" + : drbd_state_sw_errors[-err]; +} -- cgit v1.2.3 From 64189e310d1ce1a733f752bb24c9abec240ad0b3 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 30 Mar 2009 18:47:23 +0200 Subject: DRBD: final Kconfig integration, Makefile and major.h Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/Kconfig | 2 ++ drivers/block/Makefile | 1 + drivers/block/drbd/Kconfig | 32 ++++++++++++++++++++++++++++++++ drivers/block/drbd/Makefile | 7 +++++++ include/linux/major.h | 1 + 5 files changed, 43 insertions(+) create mode 100644 drivers/block/drbd/Kconfig create mode 100644 drivers/block/drbd/Makefile diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ddea8e485cc9..e8db999e30af 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -271,6 +271,8 @@ config BLK_DEV_CRYPTOLOOP instead, which can be configured to be on-disk compatible with the cryptoloop device. +source "drivers/block/drbd/Kconfig" + config BLK_DEV_NBD tristate "Network block device support" depends on NET diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 7755a5e2a85e..33f00467860c 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -35,5 +35,6 @@ obj-$(CONFIG_BLK_DEV_UB) += ub.o obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o +obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ swim_mod-objs := swim.o swim_asm.o diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig new file mode 100644 index 000000000000..466cd36dc449 --- /dev/null +++ b/drivers/block/drbd/Kconfig @@ -0,0 +1,32 @@ +# +# DRBD device driver configuration +# +config BLK_DEV_DRBD + tristate "DRBD Distributed Replicated Block Device support" + select INET + select PROC_FS + select CONNECTOR + select CRYPTO + select CRYPTO_HMAC + ---help--- + DRBD is a block device which is designed to build high availability + clusters. This is done by mirroring a whole block device via (a + dedicated) network. You could see it as a network RAID 1. + + Each minor device has a state, which can be 'primary' or 'secondary'. + On the node with the primary device the application is supposed to + run and to access the device (/dev/drbdX). Every write is sent to the + local 'lower level block device' and via network to the node with the + device in 'secondary' state. + The secondary device simply writes the data to its lower level block + device. Currently no read-balancing via the network is done. + + DRBD can also be used with "shared-disk semantics" (primary-primary), + even though it is a "shared-nothing cluster". You'd need to use a + cluster file system on top of that for cache coherency. + + DRBD management is done through user-space tools. + For automatic failover you need a cluster manager (e.g. heartbeat). + See also: http://www.drbd.org/, http://www.linux-ha.org + + If unsure, say N. diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile new file mode 100644 index 000000000000..6505d7eedb61 --- /dev/null +++ b/drivers/block/drbd/Makefile @@ -0,0 +1,7 @@ +#CFLAGS_drbd_sizeof_sanity_check.o = -Wpadded # -Werror + +drbd-objs := drbd_buildtag.o drbd_bitmap.o drbd_proc.o \ + drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o \ + lru_cache.o drbd_main.o drbd_strings.o drbd_nl.o + +obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o diff --git a/include/linux/major.h b/include/linux/major.h index 058ec15dd060..6a8ca98c9a96 100644 --- a/include/linux/major.h +++ b/include/linux/major.h @@ -145,6 +145,7 @@ #define UNIX98_PTY_MAJOR_COUNT 8 #define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) +#define DRBD_MAJOR 147 #define RTF_MAJOR 150 #define RAW_MAJOR 162 -- cgit v1.2.3 From f83a37888aeeadddfa3c7d4cfa477bd156b3bbe6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 9 Apr 2009 14:25:19 +0200 Subject: Cleanups triggered by the comments of Nikanth Karthikesan and Sam Ravnborg. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Kconfig | 45 +++++++++++++++++-------------- drivers/block/drbd/Makefile | 8 +++--- drivers/block/drbd/drbd_bitmap.c | 55 +++++--------------------------------- drivers/block/drbd/drbd_buildtag.c | 4 +-- drivers/block/drbd/lru_cache.c | 2 +- 5 files changed, 37 insertions(+), 77 deletions(-) diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index 466cd36dc449..c5847ddb24b3 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -1,31 +1,36 @@ # # DRBD device driver configuration # + +comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected" + depends on !PROC_FS || !INET || !CONNECTOR + config BLK_DEV_DRBD tristate "DRBD Distributed Replicated Block Device support" - select INET - select PROC_FS - select CONNECTOR - select CRYPTO - select CRYPTO_HMAC - ---help--- - DRBD is a block device which is designed to build high availability - clusters. This is done by mirroring a whole block device via (a - dedicated) network. You could see it as a network RAID 1. - - Each minor device has a state, which can be 'primary' or 'secondary'. + depends on PROC_FS && INET && CONNECTOR + help + + NOTE: In order to authenticate connections you have to select + CRYPTO_HMAC and a hash function as well. + + DRBD is a shared-nothing, synchronously replicated block device. It + is designed to serve as a building block for high availability + clusters and in this context, is a "drop-in" replacement for shared + storage. Simplistically, you could see it as a network RAID 1. + + Each minor device has a role, which can be 'primary' or 'secondary'. On the node with the primary device the application is supposed to - run and to access the device (/dev/drbdX). Every write is sent to the - local 'lower level block device' and via network to the node with the - device in 'secondary' state. - The secondary device simply writes the data to its lower level block - device. Currently no read-balancing via the network is done. + run and to access the device (/dev/drbdX). Every write is sent to + the local 'lower level block device' and, across the network, to the + node with the device in 'secondary' state. The secondary device + simply writes the data to its lower level block device. - DRBD can also be used with "shared-disk semantics" (primary-primary), - even though it is a "shared-nothing cluster". You'd need to use a - cluster file system on top of that for cache coherency. + DRBD can also be used in dual-Primary mode (device writable on both + nodes), which means it can exhibit shared disk semantics in a + shared-nothing cluster. Needless to say, on top of dual-Primary + DRBD utilizing a cluster file system is necessary to maintain for + cache coherency. - DRBD management is done through user-space tools. For automatic failover you need a cluster manager (e.g. heartbeat). See also: http://www.drbd.org/, http://www.linux-ha.org diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 6505d7eedb61..7237c339f7da 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -1,7 +1,5 @@ -#CFLAGS_drbd_sizeof_sanity_check.o = -Wpadded # -Werror - -drbd-objs := drbd_buildtag.o drbd_bitmap.o drbd_proc.o \ - drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o \ - lru_cache.o drbd_main.o drbd_strings.o drbd_nl.o +drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o +drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o +drbd-y += lru_cache.o drbd_main.o drbd_strings.o drbd_nl.o obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 6704d79837df..fed1d1ebb7eb 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -78,8 +78,8 @@ struct drbd_bitmap { }; /* definition of bits in bm_flags */ -#define BM_LOCKED 0 -#define BM_MD_IO_ERROR (BITS_PER_LONG-1) /* 31? 63? */ +#define BM_LOCKED 0 +#define BM_MD_IO_ERROR 1 static inline int bm_is_locked(struct drbd_bitmap *b) { @@ -148,23 +148,6 @@ void drbd_bm_unlock(struct drbd_conf *mdev) up(&b->bm_change); } -#define bm_end_info(ignored...) ((void)(0)) - -#if 0 -#define catch_oob_access_start() do { \ - do { \ - if ((bm-p_addr) >= PAGE_SIZE/sizeof(long)) { \ - printk(KERN_ALERT "drbd_bitmap.c:%u %s: p_addr:%p bm:%p %d\n", \ - __LINE__ , __func__ , p_addr, bm, (bm-p_addr)); \ - break; \ - } -#define catch_oob_access_end() \ - } while (0); } while (0) -#else -#define catch_oob_access_start() do { -#define catch_oob_access_end() } while (0) -#endif - /* word offset to long pointer */ STATIC unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) { @@ -345,18 +328,14 @@ STATIC int bm_clear_surplus(struct drbd_bitmap *b) p_addr = bm_map_paddr(b, w); bm = p_addr + MLPP(w); if (w < b->bm_words) { - catch_oob_access_start(); cleared = hweight_long(*bm & ~mask); *bm &= mask; - catch_oob_access_end(); w++; bm++; } if (w < b->bm_words) { - catch_oob_access_start(); cleared += hweight_long(*bm); *bm = 0; - catch_oob_access_end(); } bm_unmap(p_addr); return cleared; @@ -371,16 +350,12 @@ STATIC void bm_set_surplus(struct drbd_bitmap *b) p_addr = bm_map_paddr(b, w); bm = p_addr + MLPP(w); if (w < b->bm_words) { - catch_oob_access_start(); *bm |= ~mask; bm++; w++; - catch_oob_access_end(); } if (w < b->bm_words) { - catch_oob_access_start(); *bm = ~(0UL); - catch_oob_access_end(); } bm_unmap(p_addr); } @@ -396,13 +371,11 @@ STATIC unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endia p_addr = bm_map_paddr(b, offset); bm = p_addr + MLPP(offset); while (i--) { - catch_oob_access_start(); #ifndef __LITTLE_ENDIAN if (swap_endian) *bm = lel_to_cpu(*bm); #endif bits += hweight_long(*bm++); - catch_oob_access_end(); } bm_unmap(p_addr); offset += do_now; @@ -463,14 +436,12 @@ STATIC void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset; p_addr = bm_map_paddr(b, offset); bm = p_addr + MLPP(offset); - catch_oob_access_start(); if (bm+do_now > p_addr + LWPP) { printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", p_addr, bm, (int)do_now); break; /* breaks to after catch_oob_access_end() only! */ } memset(bm, c, do_now * sizeof(long)); - catch_oob_access_end(); bm_unmap(p_addr); offset += do_now; } @@ -573,16 +544,13 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) p_addr = bm_map_paddr(b, words); bm = p_addr + MLPP(words); - catch_oob_access_start(); *bm = DRBD_MAGIC; - catch_oob_access_end(); bm_unmap(p_addr); (void)bm_clear_surplus(b); if (!growing) b->bm_set = bm_count_bits(b); - bm_end_info(mdev, __func__); spin_unlock_irq(&b->bm_lock); if (opages != npages) vfree(opages); @@ -669,12 +637,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, bm = p_addr + MLPP(offset); offset += do_now; while (do_now--) { - catch_oob_access_start(); bits = hweight_long(*bm); word = *bm | lel_to_cpu(*buffer++); *bm++ = word; b->bm_set += hweight_long(word) - bits; - catch_oob_access_end(); } bm_unmap(p_addr); } @@ -683,10 +649,9 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, * where we _know_ that we are 64 bit aligned, * and know that this function is used in this way, too... */ - if (end == b->bm_words) { + if (end == b->bm_words) b->bm_set -= bm_clear_surplus(b); - bm_end_info(mdev, __func__); - } + spin_unlock_irq(&b->bm_lock); } @@ -719,11 +684,8 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, p_addr = bm_map_paddr(b, offset); bm = p_addr + MLPP(offset); offset += do_now; - while (do_now--) { - catch_oob_access_start(); + while (do_now--) *buffer++ = cpu_to_lel(*bm++); - catch_oob_access_end(); - } bm_unmap(p_addr); } } @@ -1249,11 +1211,8 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) int n = e-s; p_addr = bm_map_paddr(b, s); bm = p_addr + MLPP(s); - while (n--) { - catch_oob_access_start(); + while (n--) count += hweight_long(*bm++); - catch_oob_access_end(); - } bm_unmap(p_addr); } else { ERR("start offset (%d) too large in drbd_bm_e_weight\n", s); @@ -1288,10 +1247,8 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) p_addr = bm_map_paddr(b, s); bm = p_addr + MLPP(s); while (i--) { - catch_oob_access_start(); count += hweight_long(*bm); *bm = -1UL; - catch_oob_access_end(); bm++; } bm_unmap(p_addr); diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c index c78d252a889f..278f88474ece 100644 --- a/drivers/block/drbd/drbd_buildtag.c +++ b/drivers/block/drbd/drbd_buildtag.c @@ -2,6 +2,6 @@ #include const char *drbd_buildtag(void) { - return "GIT-hash: c74771beb9598144d31b861e7ea966f914914c4f drbd/drbd_actlog.c drbd/drbd_bitmap.c drbd/drbd_int.h drbd/drbd_main.c drbd/drbd_receiver.c drbd/drbd_req.c drbd/drbd_worker.c" - " build by phil@fat-tyre, 2009-03-30 16:54:38"; + return "GIT-hash: bbadddd7bad33396ebb8c0c12da9aab594d00c4e drbd/Makefile-2.6" + " build by phil@fat-tyre, 2009-04-01 10:29:21"; } diff --git a/drivers/block/drbd/lru_cache.c b/drivers/block/drbd/lru_cache.c index 33fad4d3e358..98b89c1d4188 100644 --- a/drivers/block/drbd/lru_cache.c +++ b/drivers/block/drbd/lru_cache.c @@ -325,7 +325,7 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e) lc->new_number = -1; clear_bit(__LC_DIRTY, &lc->flags); smp_mb__after_clear_bit(); - PARANOIA_LEAVE(); + RETURN(); } -- cgit v1.2.3 From ed73f72420ee77afa39313df13d79f16c7e4f79c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 9 Apr 2009 14:30:09 +0200 Subject: Tracking DRBD mainline 782b1ed1e3582393381e1f3f1cb80020f027ca06 Merge branch 'drbd-8.0' into drbd-8.3 80de0d362ffd9906412ce7b602db7c9cd550b678 Adjusted the drbd_congested() function to work on kernels >= 2.6.30 f484da549b3375ef7e7d9d08d5bf2c95745c8eaa Correct exapnsion of "device minor %d" statements for upper resources. 4cecefe99336e911425e390f115cf32a9036679f Fixed the res_by_minor() function 10d77c7a0d08257a19f8d67fdda3af71bf415b1d Do not do state changes on unconfigured devices (bugz 214) 706b35dc7ada32cf9cc47d331733514a52c5a820 double :: typo 3ced685cd6ae4179d77e09945a4951f4c997d2db Merge branch 'drbd-8.2' into drbd-8.3 70339fe8d52ee93696aa975b9ed14cbaa61d0e0c Merge branch 'drbd-8.0' into drbd-8.2 864141476a52d5e174dcbb03fa9f98b6da6f9689 Fixing the error code path in case the bitmap allocation failes. 8126b2adf618924c1d00407bd56b8b4f3976e198 Fault injection for the bitmap allocation c091e4513f805f9a2118f01ee87e6ec7a419126a proc/drbd: catch uninitialized congestion flag d30881451c988619e243d6294a899139eed1183d Preparing 8.0.16 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 8 ++++++-- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 25 ++++++++++++++++--------- drivers/block/drbd/drbd_nl.c | 6 +++--- drivers/block/drbd/drbd_proc.c | 2 +- drivers/block/drbd/drbd_worker.c | 2 ++ drivers/block/drbd/drbd_wrappers.h | 1 - 7 files changed, 29 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index fed1d1ebb7eb..bfc695a6d6e6 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -509,8 +509,12 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) if (want == have) { D_ASSERT(b->bm_pages != NULL); npages = b->bm_pages; - } else - npages = bm_realloc_pages(b->bm_pages, have, want); + } else { + if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC)) + npages = NULL; + else + npages = bm_realloc_pages(b->bm_pages, have, want); + } if (!npages) { err = -ENOMEM; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 01e55d959fcd..3c9a0fa2051e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -152,6 +152,7 @@ enum { DRBD_FAULT_DT_WR, /* data */ DRBD_FAULT_DT_RD, DRBD_FAULT_DT_RA, /* data read ahead */ + DRBD_FAULT_BM_ALLOC, /* bitmap allocation */ DRBD_FAULT_AL_EE, /* alloc ee */ DRBD_FAULT_MAX, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9ca38a06fdf1..c2cc18260776 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -792,6 +792,10 @@ int __drbd_set_state(struct drbd_conf *mdev, ns.pdsk = DUnknown; } + /* Clear the aftr_isp when becomming Unconfigured */ + if (ns.conn == StandAlone && ns.disk == Diskless && ns.role == Secondary) + ns.aftr_isp = 0; + if (ns.conn <= Disconnecting && ns.disk == Diskless) ns.pdsk = DUnknown; @@ -1215,8 +1219,11 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state_t os, /* Terminate worker thread if we are unconfigured - it will be restarted as needed... */ - if (ns.disk == Diskless && ns.conn == StandAlone && ns.role == Secondary) + if (ns.disk == Diskless && ns.conn == StandAlone && ns.role == Secondary) { + if (os.aftr_isp != ns.aftr_isp) + resume_next_sg(mdev); drbd_thread_stop_nowait(&mdev->worker); + } drbd_md_sync(mdev); } @@ -2856,8 +2863,8 @@ STATIC void drbd_cleanup(void) } /** - * drbd_congested: Returns 1<bc->backing_bdev); r = bdi_congested(&q->backing_dev_info, bdi_bits); dec_local(mdev); - if (r) { + if (r) reason = 'b'; - goto out; - } } - if (bdi_bits & (1 << BDI_write_congested) && test_bit(NET_CONGESTED, &mdev->flags)) { - r = (1 << BDI_write_congested); - reason = 'n'; + if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) { + r |= (1 << BDI_async_congested); + reason = reason == 'b' ? 'a' : 'n'; } out: @@ -3593,6 +3598,8 @@ _drbd_fault_str(unsigned int type) { "Data write", "Data read", "Data read ahead", + "BM allocation", + "EE allocation" }; return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**"; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ee45093d2546..eddfcae9052d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -561,7 +561,7 @@ enum determin_dev_size_enum drbd_determin_dev_size(struct drbd_conf *mdev) __mus size = drbd_bm_capacity(mdev)>>1; if (size == 0) { ERR("OUT OF MEMORY! " - "Could not allocate bitmap! "); + "Could not allocate bitmap!\n"); } else { ERR("BM resizing failed. " "Leaving size unchanged at size = %lu KB\n", @@ -886,13 +886,13 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp resync_lru = lc_alloc("resync", 61, sizeof(struct bm_extent), mdev); if (!resync_lru) { retcode = KMallocFailed; - goto fail; + goto release_bdev_fail; } if (!mdev->bitmap) { if (drbd_bm_init(mdev)) { retcode = KMallocFailed; - goto fail; + goto release_bdev_fail; } } diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 6dc93edfb646..0e271975c0bf 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -215,7 +215,7 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v) mdev->state.aftr_isp ? 'a' : '-', mdev->state.peer_isp ? 'p' : '-', mdev->state.user_isp ? 'u' : '-', - mdev->congestion_reason, + mdev->congestion_reason ?: '-', mdev->send_cnt/2, mdev->recv_cnt/2, mdev->writ_cnt/2, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a9421dc104bf..57cc537b43d1 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1212,6 +1212,8 @@ STATIC int _drbd_resume_next(struct drbd_conf *mdev) odev = minor_to_mdev(i); if (!odev) continue; + if (odev->state.conn == StandAlone && odev->state.disk == Diskless) + continue; if (odev->state.aftr_isp) { if (_drbd_may_sync_now(odev)) rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0), diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index e1dc3af1ddff..49ff68d11e8c 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -114,4 +114,3 @@ static inline void drbd_plug_device(struct drbd_conf *mdev) # undef __cond_lock # define __cond_lock(x,c) (c) #endif - -- cgit v1.2.3 From 12cb08e12683e154c00106136aa016726e7f6eb5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 14 Apr 2009 15:45:32 +0200 Subject: Cleanups by comments from Bart Van Assche. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_buildtag.c | 4 ++-- drivers/block/drbd/drbd_nl.c | 8 ++++---- drivers/block/drbd/drbd_wrappers.h | 30 ------------------------------ drivers/block/drbd/lru_cache.h | 2 +- 4 files changed, 7 insertions(+), 37 deletions(-) diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c index 278f88474ece..395c3841261e 100644 --- a/drivers/block/drbd/drbd_buildtag.c +++ b/drivers/block/drbd/drbd_buildtag.c @@ -2,6 +2,6 @@ #include const char *drbd_buildtag(void) { - return "GIT-hash: bbadddd7bad33396ebb8c0c12da9aab594d00c4e drbd/Makefile-2.6" - " build by phil@fat-tyre, 2009-04-01 10:29:21"; + return "GIT-hash: eb9569104c13fe9d08fc47fdbe171a2fd9d36bd3 drbd/drbd_nl.c drbd/drbd_wrappers.h drbd/lru_cache.h" + " build by phil@fat-tyre, 2009-04-14 15:18:35"; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index eddfcae9052d..2ae42baf24f4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -394,7 +394,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) drbd_md_sync(mdev); - drbd_kobject_uevent(mdev); + kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); fail: mutex_unlock(&mdev->state_mutex); return r; @@ -1154,7 +1154,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_md_mark_dirty(mdev); drbd_md_sync(mdev); - drbd_kobject_uevent(mdev); + kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); dec_local(mdev); reply->ret_code = retcode; return 0; @@ -1440,7 +1440,7 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, if (retcode >= SS_Success) drbd_thread_start(&mdev->worker); - drbd_kobject_uevent(mdev); + kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); reply->ret_code = retcode; return 0; @@ -1733,7 +1733,7 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n mdev->worker.reset_cpu_mask = 1; } - drbd_kobject_uevent(mdev); + kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); fail: crypto_free_hash(csums_tfm); crypto_free_hash(verify_tfm); diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index 49ff68d11e8c..3ebacbc7a39f 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -40,36 +40,6 @@ extern void drbd_endio_read_sec(struct bio *bio, int error); extern void drbd_endio_write_sec(struct bio *bio, int error); extern void drbd_endio_pri(struct bio *bio, int error); -/* how to get to the kobj of a gendisk. - * see also upstream commits - * edfaa7c36574f1bf09c65ad602412db9da5f96bf - * ed9e1982347b36573cd622ee5f4e2a7ccd79b3fd - * 548b10eb2959c96cef6fc29fc96e0931eeb53bc5 - */ -#ifndef dev_to_disk -# define disk_to_kobj(disk) (&(disk)->kobj) -#else -# ifndef disk_to_dev -# define disk_to_dev(disk) (&(disk)->dev) -# endif -# define disk_to_kobj(disk) (&disk_to_dev(disk)->kobj) -#endif -static inline void drbd_kobject_uevent(struct drbd_conf *mdev) -{ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10) -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,15) - kobject_uevent(disk_to_kobj(mdev->vdisk), KOBJ_CHANGE, NULL); -#else - kobject_uevent(disk_to_kobj(mdev->vdisk), KOBJ_CHANGE); - /* rhel4 / sles9 and older don't have this at all, - * which means user space (udev) won't get events about possible changes of - * corresponding resource + disk names after the initial drbd minor creation. - */ -#endif -#endif -} - - /* * used to submit our private bio */ diff --git a/drivers/block/drbd/lru_cache.h b/drivers/block/drbd/lru_cache.h index 6c095df24c04..eabf897948d0 100644 --- a/drivers/block/drbd/lru_cache.h +++ b/drivers/block/drbd/lru_cache.h @@ -1,5 +1,5 @@ /* - lru_cache.c + lru_cache.h This file is part of DRBD by Philipp Reisner and Lars Ellenberg. -- cgit v1.2.3 From cd359612986b89fb7dafb660917f333f555ca8e1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 16 Apr 2009 15:31:29 +0200 Subject: Cleanups as suggested by reviewers. * Moving BUG_ON() as suggested by Nikanth Karthikesan * Removing our ERR, drbd_WARN, INFO, ALERT macros, using the kernel standard dev_err, dev_warn etc.. instead suggested by Bart Van Assche Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 92 +++++----- drivers/block/drbd/drbd_bitmap.c | 42 ++--- drivers/block/drbd/drbd_buildtag.c | 4 +- drivers/block/drbd/drbd_int.h | 44 ++--- drivers/block/drbd/drbd_main.c | 151 ++++++++-------- drivers/block/drbd/drbd_nl.c | 121 +++++++------ drivers/block/drbd/drbd_receiver.c | 347 ++++++++++++++++++++----------------- drivers/block/drbd/drbd_req.c | 38 ++-- drivers/block/drbd/drbd_req.h | 4 +- drivers/block/drbd/drbd_worker.c | 105 ++++++----- drivers/block/drbd/drbd_wrappers.h | 6 + drivers/block/drbd/lru_cache.c | 3 +- 12 files changed, 498 insertions(+), 459 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index f5c0614cf386..90ad8cbeafee 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -107,7 +107,7 @@ STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev, * don't try again for ANY return value != 0 */ if (unlikely(bio_barrier(bio) && !ok)) { /* Try again with no barrier */ - drbd_WARN("Barriers not supported on meta data device - disabling\n"); + dev_warn(DEV, "Barriers not supported on meta data device - disabling\n"); set_bit(MD_NO_BARRIER, &mdev->flags); rw &= ~(1 << BIO_RW_BARRIER); bio_put(bio); @@ -150,7 +150,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector, READ, hardsect); if (unlikely(!ok)) { - ERR("drbd_md_sync_page_io(,%llus," + dev_err(DEV, "drbd_md_sync_page_io(,%llus," "READ [hardsect!=512]) failed!\n", (unsigned long long)sector); return 0; @@ -162,13 +162,13 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, if (sector < drbd_md_first_sector(bdev) || sector > drbd_md_last_sector(bdev)) - ALERT("%s [%d]:%s(,%llus,%s) out of range md access!\n", + dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n", current->comm, current->pid, __func__, (unsigned long long)sector, rw ? "WRITE" : "READ"); ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, hardsect); if (unlikely(!ok)) { - ERR("drbd_md_sync_page_io(,%llus,%s) failed!\n", + dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", (unsigned long long)sector, rw ? "WRITE" : "READ"); return 0; } @@ -206,9 +206,9 @@ struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) /* if (!al_ext) { if (al_flags & LC_STARVING) - drbd_WARN("Have to wait for LRU element (AL too small?)\n"); + dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n"); if (al_flags & LC_DIRTY) - drbd_WARN("Ongoing AL update (AL device too slow?)\n"); + dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n"); } */ @@ -224,7 +224,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) D_ASSERT(atomic_read(&mdev->local_cnt) > 0); MTRACE(TraceTypeALExts, TraceLvlMetrics, - INFO("al_begin_io( sec=%llus (al_enr=%u) (rs_enr=%d) )\n", + dev_info(DEV, "al_begin_io( sec=%llus (al_enr=%u) (rs_enr=%d) )\n", (unsigned long long) sector, enr, (int)BM_SECT_TO_EXT(sector)); ); @@ -261,7 +261,7 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) unsigned long flags; MTRACE(TraceTypeALExts, TraceLvlMetrics, - INFO("al_complete_io( sec=%llus (al_enr=%u) (rs_enr=%d) )\n", + dev_info(DEV, "al_complete_io( sec=%llus (al_enr=%u) (rs_enr=%d) )\n", (unsigned long long) sector, enr, (int)BM_SECT_TO_EXT(sector)); ); @@ -272,7 +272,7 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) if (!extent) { spin_unlock_irqrestore(&mdev->al_lock, flags); - ERR("al_complete_io() called on inactive extent %u\n", enr); + dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr); return; } @@ -297,7 +297,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) u32 xor_sum = 0; if (!inc_local(mdev)) { - ERR("inc_local() failed in w_al_write_transaction\n"); + dev_err(DEV, "inc_local() failed in w_al_write_transaction\n"); complete(&((struct update_al_work *)w)->event); return 1; } @@ -448,14 +448,14 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } if (from == -1 || to == -1) { - drbd_WARN("No usable activity log found.\n"); + dev_warn(DEV, "No usable activity log found.\n"); mutex_unlock(&mdev->md_io_mutex); return 1; } /* Read the valid transactions. - * INFO("Reading from %d to %d.\n",from,to); */ + * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ i = from; while (1) { int j, pos; @@ -508,7 +508,7 @@ cancel: /* ok, we are done with it */ mutex_unlock(&mdev->md_io_mutex); - INFO("Found %d transactions (%d active extents) in activity log.\n", + dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", transactions, active_extents); return 1; @@ -710,7 +710,7 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) kfree(bios); submit_one_by_one: - drbd_WARN("Using the slow drbd_al_to_on_disk_bm()\n"); + dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n"); for (i = 0; i < mdev->act_log->nr_elements; i++) { enr = lc_entry(mdev->act_log, i)->lc_number; @@ -749,7 +749,7 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev) lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); - INFO("Marked additional %s as out-of-sync based on AL.\n", + dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n", ppsize(ppb, Bit2KB(add))); } @@ -765,7 +765,7 @@ static inline int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) MTRACE(TraceTypeALExts, TraceLvlMetrics, if (unlikely(!rv)) - INFO("Waiting for extent in drbd_al_shrink()\n"); + dev_info(DEV, "Waiting for extent in drbd_al_shrink()\n"); ); return rv; @@ -799,7 +799,7 @@ STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused if (!inc_local(mdev)) { if (__ratelimit(&drbd_ratelimit_state)) - drbd_WARN("Can not update on disk bitmap, local IO disabled.\n"); + dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n"); return 1; } @@ -852,7 +852,7 @@ STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, else ext->rs_failed += count; if (ext->rs_left < ext->rs_failed) { - ERR("BAD! sector=%llus enr=%u rs_left=%d " + dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d " "rs_failed=%d count=%d\n", (unsigned long long)sector, ext->lce.lc_number, ext->rs_left, @@ -872,14 +872,14 @@ STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, */ int rs_left = drbd_bm_e_weight(mdev, enr); if (ext->flags != 0) { - drbd_WARN("changing resync lce: %d[%u;%02lx]" + dev_warn(DEV, "changing resync lce: %d[%u;%02lx]" " -> %d[%u;00]\n", ext->lce.lc_number, ext->rs_left, ext->flags, enr, rs_left); ext->flags = 0; } if (ext->rs_failed) { - drbd_WARN("Kicking resync_lru element enr=%u " + dev_warn(DEV, "Kicking resync_lru element enr=%u " "out with rs_failed=%d\n", ext->lce.lc_number, ext->rs_failed); set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); @@ -900,12 +900,12 @@ STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, udw->w.cb = w_update_odbm; drbd_queue_work_front(&mdev->data.work, &udw->w); } else { - drbd_WARN("Could not kmalloc an udw\n"); + dev_warn(DEV, "Could not kmalloc an udw\n"); set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags); } } } else { - ERR("lc_get() failed! locked=%d/%d flags=%lu\n", + dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n", mdev->resync_locked, mdev->resync->nr_elements, mdev->resync->flags); @@ -930,7 +930,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, unsigned long flags; if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { - ERR("drbd_set_in_sync: sector=%llus size=%d nonsense!\n", + dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; } @@ -954,7 +954,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); MTRACE(TraceTypeResync, TraceLvlMetrics, - INFO("drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", + dev_info(DEV, "drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", (unsigned long long)sector, size, sbnr, ebnr); ); @@ -1009,7 +1009,7 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, struct bm_extent *ext; if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { - ERR("sector: %llus, size: %d\n", + dev_err(DEV, "sector: %llus, size: %d\n", (unsigned long long)sector, size); return; } @@ -1033,7 +1033,7 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, ebnr = BM_SECT_TO_BIT(esector); MTRACE(TraceTypeResync, TraceLvlMetrics, - INFO("drbd_set_out_of_sync: sector=%llus size=%u " + dev_info(DEV, "drbd_set_out_of_sync: sector=%llus size=%u " "sbnr=%lu ebnr=%lu\n", (unsigned long long)sector, size, sbnr, ebnr); ); @@ -1084,7 +1084,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) if (!bm_ext) { if (rs_flags & LC_STARVING) - drbd_WARN("Have to wait for element" + dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); BUG_ON(rs_flags & LC_DIRTY); } @@ -1111,7 +1111,7 @@ static inline int _is_in_al(struct drbd_conf *mdev, unsigned int enr) /* if (unlikely(rv)) { - INFO("Delaying sync read until app's write is done\n"); + dev_info(DEV, "Delaying sync read until app's write is done\n"); } */ return rv; @@ -1134,7 +1134,7 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) int i, sig; MTRACE(TraceTypeResync, TraceLvlAll, - INFO("drbd_rs_begin_io: sector=%llus (rs_end=%d)\n", + dev_info(DEV, "drbd_rs_begin_io: sector=%llus (rs_end=%d)\n", (unsigned long long)sector, enr); ); @@ -1184,7 +1184,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) int i; MTRACE(TraceTypeResync, TraceLvlAll, - INFO("drbd_try_rs_begin_io: sector=%llus\n", + dev_info(DEV, "drbd_try_rs_begin_io: sector=%llus\n", (unsigned long long)sector); ); @@ -1204,7 +1204,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) * we also have to wake_up */ MTRACE(TraceTypeResync, TraceLvlAll, - INFO("dropping %u, aparently got 'synced' " + dev_info(DEV, "dropping %u, aparently got 'synced' " "by application io\n", mdev->resync_wenr); ); bm_ext = (struct bm_extent *) @@ -1218,7 +1218,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) mdev->resync_locked--; wake_up(&mdev->al_wait); } else { - ALERT("LOGIC BUG\n"); + dev_alert(DEV, "LOGIC BUG\n"); } } bm_ext = (struct bm_extent *)lc_try_get(mdev->resync, enr); @@ -1233,7 +1233,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) * so we tried again. * drop the extra reference. */ MTRACE(TraceTypeResync, TraceLvlAll, - INFO("dropping extra reference on %u\n", enr); + dev_info(DEV, "dropping extra reference on %u\n", enr); ); bm_ext->lce.refcnt--; D_ASSERT(bm_ext->lce.refcnt > 0); @@ -1242,7 +1242,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) } else { if (mdev->resync_locked > mdev->resync->nr_elements-3) { MTRACE(TraceTypeResync, TraceLvlAll, - INFO("resync_locked = %u!\n", mdev->resync_locked); + dev_info(DEV, "resync_locked = %u!\n", mdev->resync_locked); ); goto try_again; } @@ -1250,7 +1250,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) if (!bm_ext) { const unsigned long rs_flags = mdev->resync->flags; if (rs_flags & LC_STARVING) - drbd_WARN("Have to wait for element" + dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); BUG_ON(rs_flags & LC_DIRTY); goto try_again; @@ -1269,7 +1269,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) } check_al: MTRACE(TraceTypeResync, TraceLvlAll, - INFO("checking al for %u\n", enr); + dev_info(DEV, "checking al for %u\n", enr); ); for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { if (unlikely(al_enr+i == mdev->act_log->new_number)) @@ -1285,7 +1285,7 @@ proceed: try_again: MTRACE(TraceTypeResync, TraceLvlAll, - INFO("need to try again for %u\n", enr); + dev_info(DEV, "need to try again for %u\n", enr); ); if (bm_ext) mdev->resync_wenr = enr; @@ -1300,7 +1300,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) unsigned long flags; MTRACE(TraceTypeResync, TraceLvlAll, - INFO("drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n", + dev_info(DEV, "drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n", (long long)sector, enr); ); @@ -1308,13 +1308,13 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) bm_ext = (struct bm_extent *) lc_find(mdev->resync, enr); if (!bm_ext) { spin_unlock_irqrestore(&mdev->al_lock, flags); - ERR("drbd_rs_complete_io() called, but extent not found\n"); + dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n"); return; } if (bm_ext->lce.refcnt == 0) { spin_unlock_irqrestore(&mdev->al_lock, flags); - ERR("drbd_rs_complete_io(,%llu [=%u]) called, " + dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, " "but refcnt is 0!?\n", (unsigned long long)sector, enr); return; @@ -1337,7 +1337,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) void drbd_rs_cancel_all(struct drbd_conf *mdev) { MTRACE(TraceTypeResync, TraceLvlMetrics, - INFO("drbd_rs_cancel_all\n"); + dev_info(DEV, "drbd_rs_cancel_all\n"); ); spin_lock_irq(&mdev->al_lock); @@ -1364,7 +1364,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) int i; MTRACE(TraceTypeResync, TraceLvlMetrics, - INFO("drbd_rs_del_all\n"); + dev_info(DEV, "drbd_rs_del_all\n"); ); spin_lock_irq(&mdev->al_lock); @@ -1376,7 +1376,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) if (bm_ext->lce.lc_number == LC_FREE) continue; if (bm_ext->lce.lc_number == mdev->resync_wenr) { - INFO("dropping %u in drbd_rs_del_all, apparently" + dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently" " got 'synced' by application io\n", mdev->resync_wenr); D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); @@ -1386,7 +1386,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) lc_put(mdev->resync, &bm_ext->lce); } if (bm_ext->lce.refcnt != 0) { - INFO("Retrying drbd_rs_del_all() later. " + dev_info(DEV, "Retrying drbd_rs_del_all() later. " "refcnt=%d\n", bm_ext->lce.refcnt); dec_local(mdev); spin_unlock_irq(&mdev->al_lock); @@ -1418,12 +1418,12 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) int wake_up = 0; MTRACE(TraceTypeResync, TraceLvlSummary, - INFO("drbd_rs_failed_io: sector=%llus, size=%u\n", + dev_info(DEV, "drbd_rs_failed_io: sector=%llus, size=%u\n", (unsigned long long)sector, size); ); if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { - ERR("drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", + dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; } diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index bfc695a6d6e6..d8e6d493012e 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -92,7 +92,7 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) struct drbd_bitmap *b = mdev->bitmap; if (!__ratelimit(&drbd_ratelimit_state)) return; - ERR("FIXME %s in %s, bitmap locked for '%s' by %s\n", + dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", current == mdev->receiver.task ? "receiver" : current == mdev->asender.task ? "asender" : current == mdev->worker.task ? "worker" : current->comm, @@ -108,14 +108,14 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why) int trylock_failed; if (!b) { - ERR("FIXME no bitmap in drbd_bm_lock!?\n"); + dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n"); return; } trylock_failed = down_trylock(&b->bm_change); if (trylock_failed) { - DBG("%s going to '%s' but bitmap already locked for '%s' by %s\n", + dev_dbg(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", current == mdev->receiver.task ? "receiver" : current == mdev->asender.task ? "asender" : current == mdev->worker.task ? "worker" : "?", @@ -126,7 +126,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why) down(&b->bm_change); } if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) - ERR("FIXME bitmap already locked in bm_lock\n"); + dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); b->bm_why = why; b->bm_task = current; @@ -136,12 +136,12 @@ void drbd_bm_unlock(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; if (!b) { - ERR("FIXME no bitmap in drbd_bm_unlock!?\n"); + dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n"); return; } if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags)) - ERR("FIXME bitmap not locked in bm_unlock\n"); + dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); b->bm_why = NULL; b->bm_task = NULL; @@ -410,7 +410,7 @@ void _drbd_bm_recount_bits(struct drbd_conf *mdev, char *file, int line) spin_lock_irqsave(&b->bm_lock, flags); bits = bm_count_bits(b); if (bits != b->bm_set) { - ERR("bm_set was %lu, corrected to %lu. %s:%d\n", + dev_err(DEV, "bm_set was %lu, corrected to %lu. %s:%d\n", b->bm_set, bits, file, line); b->bm_set = bits; } @@ -467,7 +467,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) drbd_bm_lock(mdev, "resize"); - INFO("drbd_bm_resize called with capacity == %llu\n", + dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", (unsigned long long)capacity); if (capacity == b->bm_dev_capacity) @@ -558,7 +558,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) spin_unlock_irq(&b->bm_lock); if (opages != npages) vfree(opages); - INFO("resync bitmap: bits=%lu words=%lu\n", bits, words); + dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words); out: drbd_bm_unlock(mdev); @@ -678,7 +678,7 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, if ((offset >= b->bm_words) || (end > b->bm_words) || (number <= 0)) - ERR("offset=%lu number=%lu bm_words=%lu\n", + dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n", (unsigned long) offset, (unsigned long) number, (unsigned long) b->bm_words); @@ -843,12 +843,12 @@ STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); MTRACE(TraceTypeMDIO, TraceLvlSummary, - INFO("%s of bitmap took %lu jiffies\n", + dev_info(DEV, "%s of bitmap took %lu jiffies\n", rw == READ ? "reading" : "writing", jiffies - now); ); if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { - ALERT("we had at least one MD IO ERROR during bitmap IO\n"); + dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); drbd_chk_io_error(mdev, 1, TRUE); drbd_io_error(mdev, TRUE); err = -EIO; @@ -863,12 +863,12 @@ STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) } else /* rw == READ */ { /* just read, if neccessary adjust endianness */ b->bm_set = bm_count_bits_swap_endian(b); - INFO("recounting of set bits took additional %lu jiffies\n", + dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", jiffies - now); } now = b->bm_set; - INFO("%s (%lu bits) marked out-of-sync by on disk bit-map.\n", + dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", ppsize(ppb, now << (BM_BLOCK_SIZE_B-10)), now); return err; @@ -919,7 +919,7 @@ int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(lo if (!drbd_md_sync_page_io(mdev, mdev->bc, on_disk_sector, WRITE)) { int i; err = -EIO; - ERR("IO ERROR writing bitmap sector %lu " + dev_err(DEV, "IO ERROR writing bitmap sector %lu " "(meta-disk sector %llus)\n", enr, (unsigned long long)on_disk_sector); drbd_chk_io_error(mdev, 1, TRUE); @@ -948,7 +948,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, unsigned long bit_offset; /* bit offset of the mapped page. */ if (bm_fo > b->bm_bits) { - ERR("bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); + dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); } else { while (bm_fo < b->bm_bits) { unsigned long offset; @@ -1039,7 +1039,7 @@ int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, for (bitnr = s; bitnr <= e; bitnr++) { ERR_IF (bitnr >= b->bm_bits) { - ERR("bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); + dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); } else { unsigned long offset = bitnr>>LN2_BPL; unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); @@ -1133,7 +1133,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) } else if (bitnr == b->bm_bits) { i = -1; } else { /* (bitnr > b->bm_bits) */ - ERR("bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits); + dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits); i = 0; } @@ -1168,7 +1168,7 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi p_addr = bm_map_paddr(b, w); } ERR_IF (bitnr >= b->bm_bits) { - ERR("bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); + dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); } else { c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); } @@ -1219,7 +1219,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) count += hweight_long(*bm++); bm_unmap(p_addr); } else { - ERR("start offset (%d) too large in drbd_bm_e_weight\n", s); + dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s); } spin_unlock_irqrestore(&b->bm_lock, flags); return count; @@ -1260,7 +1260,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) if (e == b->bm_words) b->bm_set -= bm_clear_surplus(b); } else { - ERR("start offset (%d) too large in drbd_bm_ALe_set_all\n", s); + dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s); } weight = b->bm_set - weight; spin_unlock_irq(&b->bm_lock); diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c index 395c3841261e..368298ce9f68 100644 --- a/drivers/block/drbd/drbd_buildtag.c +++ b/drivers/block/drbd/drbd_buildtag.c @@ -2,6 +2,6 @@ #include const char *drbd_buildtag(void) { - return "GIT-hash: eb9569104c13fe9d08fc47fdbe171a2fd9d36bd3 drbd/drbd_nl.c drbd/drbd_wrappers.h drbd/lru_cache.h" - " build by phil@fat-tyre, 2009-04-14 15:18:35"; + return "GIT-hash: 2f1a0b8bfe96b74872f056b4d0fc14faa6c87b3b drbd/drbd_actlog.c drbd/drbd_bitmap.c drbd/drbd_int.h drbd/drbd_main.c drbd/drbd_nl.c drbd/drbd_receiver.c drbd/drbd_req.c drbd/drbd_req.h drbd/drbd_worker.c" + " build by phil@fat-tyre, 2009-04-16 15:21:58"; } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3c9a0fa2051e..1b51051e2fcf 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -115,30 +115,22 @@ struct drbd_conf; * Some Message Macros *************************/ -#define DUMPP(A) ERR(#A " = %p in %s:%d\n", (A), __FILE__, __LINE__); -#define DUMPLU(A) ERR(#A " = %lu in %s:%d\n", (unsigned long)(A), __FILE__, __LINE__); -#define DUMPLLU(A) ERR(#A " = %llu in %s:%d\n", (unsigned long long)(A), __FILE__, __LINE__); -#define DUMPLX(A) ERR(#A " = %lx in %s:%d\n", (A), __FILE__, __LINE__); -#define DUMPI(A) ERR(#A " = %d in %s:%d\n", (int)(A), __FILE__, __LINE__); +#define DUMPP(A) dev_err(DEV, #A " = %p in %s:%d\n", (A), __FILE__, __LINE__); +#define DUMPLU(A) dev_err(DEV, #A " = %lu in %s:%d\n", (unsigned long)(A), __FILE__, __LINE__); +#define DUMPLLU(A) dev_err(DEV, #A " = %llu in %s:%d\n", (unsigned long long)(A), __FILE__, __LINE__); +#define DUMPLX(A) dev_err(DEV, #A " = %lx in %s:%d\n", (A), __FILE__, __LINE__); +#define DUMPI(A) dev_err(DEV, #A " = %d in %s:%d\n", (int)(A), __FILE__, __LINE__); -#define PRINTK(level, fmt, args...) \ - printk(level "drbd%d: " fmt, \ - mdev->minor , ##args) - -#define ALERT(fmt, args...) PRINTK(KERN_ALERT, fmt , ##args) -#define ERR(fmt, args...) PRINTK(KERN_ERR, fmt , ##args) -/* nowadays, WARN() is defined as BUG() without crash in bug.h */ -#define drbd_WARN(fmt, args...) PRINTK(KERN_WARNING, fmt , ##args) -#define INFO(fmt, args...) PRINTK(KERN_INFO, fmt , ##args) -#define DBG(fmt, args...) PRINTK(KERN_DEBUG, fmt , ##args) +/* to shorten dev_warn(DEV, "msg"); and relatives statements */ +#define DEV (disk_to_dev(mdev->vdisk)) #define D_ASSERT(exp) if (!(exp)) \ - ERR("ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) + dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) #define ERR_IF(exp) if (({ \ int _b = (exp) != 0; \ - if (_b) ERR("%s: (%s) in %s:%d\n", \ + if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n", \ __func__, #exp, __FILE__, __LINE__); \ _b; \ })) @@ -1093,8 +1085,8 @@ extern void drbd_free_sock(struct drbd_conf *mdev); extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, void *buf, size_t size, unsigned msg_flags); extern int drbd_send_protocol(struct drbd_conf *mdev); -extern int _drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids(struct drbd_conf *mdev); +extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); extern int drbd_send_sizes(struct drbd_conf *mdev); extern int _drbd_send_state(struct drbd_conf *mdev); @@ -1504,7 +1496,7 @@ extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); static inline void ov_oos_print(struct drbd_conf *mdev) { if (mdev->ov_last_oos_size) { - ERR("Out of sync: start=%llu, size=%lu (sectors)\n", + dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n", (unsigned long long)mdev->ov_last_oos_start, (unsigned long)mdev->ov_last_oos_size); } @@ -1727,7 +1719,7 @@ static inline void __drbd_chk_io_error(struct drbd_conf *mdev, int forcedetach) case PassOn: if (!forcedetach) { if (printk_ratelimit()) - ERR("Local IO failed. Passing error on...\n"); + dev_err(DEV, "Local IO failed. Passing error on...\n"); break; } /* NOTE fall through to detach case if forcedetach set */ @@ -1735,7 +1727,7 @@ static inline void __drbd_chk_io_error(struct drbd_conf *mdev, int forcedetach) case CallIOEHelper: if (mdev->state.disk > Failed) { _drbd_set_state(_NS(mdev, disk, Failed), ChgStateHard, NULL); - ERR("Local IO failed. Detaching...\n"); + dev_err(DEV, "Local IO failed. Detaching...\n"); } break; } @@ -1832,7 +1824,7 @@ static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, * position: last 4k aligned block of 4k size */ if (!bdev->backing_bdev) { if (__ratelimit(&drbd_ratelimit_state)) { - ERR("bdev->backing_bdev==NULL\n"); + dev_err(DEV, "bdev->backing_bdev==NULL\n"); dump_stack(); } return 0; @@ -1948,7 +1940,7 @@ static inline void inc_ap_pending(struct drbd_conf *mdev) #define ERR_IF_CNT_IS_NEGATIVE(which) \ if (atomic_read(&mdev->which) < 0) \ - ERR("in %s:%d: " #which " = %d < 0 !\n", \ + dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n", \ __func__ , __LINE__ , \ atomic_read(&mdev->which)) @@ -2074,7 +2066,7 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, * for now, just prevent in-kernel buffer overflow. */ smp_rmb(); - drbd_WARN("cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", + dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", conns_to_name(mdev->state.conn), *bits_left, mdev->rs_total, mdev->rs_failed); *per_mil_done = 0; @@ -2241,7 +2233,7 @@ static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) mdev->ed_uuid = val; MTRACE(TraceTypeUuid, TraceLvlMetrics, - INFO(" exposed data uuid now %016llX\n", + dev_info(DEV, " exposed data uuid now %016llX\n", (unsigned long long)val); ); } @@ -2314,7 +2306,7 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) r = blkdev_issue_flush(mdev->bc->md_bdev, NULL); if (r) { set_bit(MD_NO_BARRIER, &mdev->flags); - ERR("meta data flush failed with status %d, disabling md-flushes\n", r); + dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); } } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c2cc18260776..dfb48e2c1bc5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -258,17 +258,17 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, /* first some paranoia code */ if (b == NULL) { - ERR("BAD! BarrierAck #%u received, but no epoch in tl!?\n", + dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", barrier_nr); goto bail; } if (b->br_number != barrier_nr) { - ERR("BAD! BarrierAck #%u received, expected #%u!\n", + dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n", barrier_nr, b->br_number); goto bail; } if (b->n_req != set_size) { - ERR("BAD! BarrierAck #%u received with n_req=%u, expected n_req=%u!\n", + dev_err(DEV, "BAD! BarrierAck #%u received with n_req=%u, expected n_req=%u!\n", barrier_nr, set_size, b->n_req); goto bail; } @@ -415,9 +415,9 @@ int drbd_io_error(struct drbd_conf *mdev, int forcedetach) if (mdev->state.conn >= Connected) { ok = drbd_send_state(mdev); if (ok) - drbd_WARN("Notified peer that my disk is broken.\n"); + dev_warn(DEV, "Notified peer that my disk is broken.\n"); else - ERR("Sending state in drbd_io_error() failed\n"); + dev_err(DEV, "Sending state in drbd_io_error() failed\n"); } /* Make sure we try to flush meta-data to disk - we come @@ -610,7 +610,7 @@ int _drbd_request_state(struct drbd_conf *mdev, union drbd_state_t mask, STATIC void print_st(struct drbd_conf *mdev, char *name, union drbd_state_t ns) { - ERR(" %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n", + dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n", name, conns_to_name(ns.conn), roles_to_name(ns.role), @@ -629,7 +629,7 @@ void print_st_err(struct drbd_conf *mdev, { if (err == SS_InTransientState) return; - ERR("State change failed: %s\n", set_st_err_name(err)); + dev_err(DEV, "State change failed: %s\n", set_st_err_name(err)); print_st(mdev, " state", os); print_st(mdev, "wanted", ns); } @@ -821,11 +821,11 @@ int __drbd_set_state(struct drbd_conf *mdev, break; case SyncTarget: ns.disk = Inconsistent; - drbd_WARN("Implicit set disk state Inconsistent!\n"); + dev_warn(DEV, "Implicitly set disk state Inconsistent!\n"); break; } if (os.disk == Outdated && ns.disk == UpToDate) - drbd_WARN("Implicit set disk from Outdate to UpToDate\n"); + dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n"); } if (ns.conn >= Connected && @@ -843,11 +843,11 @@ int __drbd_set_state(struct drbd_conf *mdev, break; case SyncSource: ns.pdsk = Inconsistent; - drbd_WARN("Implicit set pdsk Inconsistent!\n"); + dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n"); break; } if (os.pdsk == Outdated && ns.pdsk == UpToDate) - drbd_WARN("Implicit set pdsk from Outdate to UpToDate\n"); + dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n"); } /* Connection breaks down before we finished "Negotiating" */ @@ -857,7 +857,7 @@ int __drbd_set_state(struct drbd_conf *mdev, ns.disk = mdev->new_state_tmp.disk; ns.pdsk = mdev->new_state_tmp.pdsk; } else { - ALERT("Connection lost while negotiating, no data!\n"); + dev_alert(DEV, "Connection lost while negotiating, no data!\n"); ns.disk = Diskless; ns.pdsk = DUnknown; } @@ -895,7 +895,7 @@ int __drbd_set_state(struct drbd_conf *mdev, this happen...*/ if (is_valid_state(mdev, os) == rv) { - ERR("Considering state change from bad state. " + dev_err(DEV, "Considering state change from bad state. " "Error would be: '%s'\n", set_st_err_name(rv)); print_st(mdev, "old", os); @@ -913,7 +913,7 @@ int __drbd_set_state(struct drbd_conf *mdev, } if (warn_sync_abort) - drbd_WARN("Resync aborted.\n"); + dev_warn(DEV, "Resync aborted.\n"); { char *pbp, pb[300]; @@ -928,7 +928,7 @@ int __drbd_set_state(struct drbd_conf *mdev, PSC(aftr_isp); PSC(peer_isp); PSC(user_isp); - INFO("%s\n", pb); + dev_info(DEV, "%s\n", pb); } mdev->state.i = ns.i; @@ -943,7 +943,7 @@ int __drbd_set_state(struct drbd_conf *mdev, if ((os.conn == PausedSyncT || os.conn == PausedSyncS) && (ns.conn == SyncTarget || ns.conn == SyncSource)) { - INFO("Syncer continues.\n"); + dev_info(DEV, "Syncer continues.\n"); mdev->rs_paused += (long)jiffies-(long)mdev->rs_mark_time; if (ns.conn == SyncTarget) { if (!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags)) @@ -957,7 +957,7 @@ int __drbd_set_state(struct drbd_conf *mdev, if ((os.conn == SyncTarget || os.conn == SyncSource) && (ns.conn == PausedSyncT || ns.conn == PausedSyncS)) { - INFO("Resync suspended\n"); + dev_info(DEV, "Resync suspended\n"); mdev->rs_mark_time = jiffies; if (ns.conn == PausedSyncT) set_bit(STOP_SYNC_TIMER, &mdev->flags); @@ -1032,7 +1032,7 @@ int __drbd_set_state(struct drbd_conf *mdev, ascw->done = done; drbd_queue_work(&mdev->data.work, &ascw->w); } else { - drbd_WARN("Could not kmalloc an ascw\n"); + dev_warn(DEV, "Could not kmalloc an ascw\n"); } return rv; @@ -1056,7 +1056,7 @@ STATIC int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unu static void abw_start_sync(struct drbd_conf *mdev, int rv) { if (rv) { - ERR("Writing the bitmap failed not starting resync.\n"); + dev_err(DEV, "Writing the bitmap failed not starting resync.\n"); _drbd_request_state(mdev, NS(conn, Connected), ChgStateVerbose); return; } @@ -1251,7 +1251,7 @@ restart: */ if (thi->t_state == Restarting) { - INFO("Restarting %s\n", current->comm); + dev_info(DEV, "Restarting %s\n", current->comm); thi->t_state = Running; spin_unlock(&thi->t_lock); goto restart; @@ -1263,7 +1263,7 @@ restart: complete(&thi->stop); spin_unlock(&thi->t_lock); - INFO("Terminating %s\n", current->comm); + dev_info(DEV, "Terminating %s\n", current->comm); /* Release mod reference taken when thread was started */ module_put(THIS_MODULE); @@ -1292,12 +1292,12 @@ int drbd_thread_start(struct Drbd_thread *thi) spin_lock(&thi->t_lock); switch (thi->t_state) { case None: - INFO("Starting %s thread (from %s [%d])\n", + dev_info(DEV, "Starting %s thread (from %s [%d])\n", me, current->comm, current->pid); /* Get ref on module for thread - this is released when thread exits */ if (!try_module_get(THIS_MODULE)) { - ERR("Failed to get module reference in drbd_thread_start\n"); + dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); spin_unlock(&thi->t_lock); return FALSE; } @@ -1312,7 +1312,7 @@ int drbd_thread_start(struct Drbd_thread *thi) "drbd%d_%s", mdev_to_minor(mdev), me); if (IS_ERR(nt)) { - ERR("Couldn't start thread\n"); + dev_err(DEV, "Couldn't start thread\n"); module_put(THIS_MODULE); return FALSE; @@ -1325,7 +1325,7 @@ int drbd_thread_start(struct Drbd_thread *thi) break; case Exiting: thi->t_state = Restarting; - INFO("Restarting %s thread (from %s [%d])\n", + dev_info(DEV, "Restarting %s thread (from %s [%d])\n", me, current->comm, current->pid); case Running: case Restarting: @@ -1367,9 +1367,8 @@ void _drbd_thread_stop(struct Drbd_thread *thi, int restart, int wait) spin_unlock(&thi->t_lock); - if (wait) { + if (wait) wait_for_completion(&thi->stop); - } } #ifdef CONFIG_SMP @@ -1443,7 +1442,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, ok = (sent == size); if (!ok) - ERR("short sent %s size=%d sent=%d\n", + dev_err(DEV, "short sent %s size=%d sent=%d\n", cmdname(cmd), (int)size, sent); return ok; } @@ -1574,13 +1573,11 @@ int drbd_send_protocol(struct drbd_conf *mdev) return rv; } -int drbd_send_uuids(struct drbd_conf *mdev) +int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) { struct Drbd_GenCnt_Packet p; int i; - u64 uuid_flags = 0; - if (!inc_local_if_state(mdev, Negotiating)) return 1; @@ -1600,6 +1597,17 @@ int drbd_send_uuids(struct drbd_conf *mdev) (struct Drbd_Header *)&p, sizeof(p)); } +int drbd_send_uuids(struct drbd_conf *mdev) +{ + return _drbd_send_uuids(mdev, 0); +} + +int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev) +{ + return _drbd_send_uuids(mdev, 8); +} + + int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) { struct Drbd_SyncUUID_Packet p; @@ -1755,7 +1763,7 @@ int fill_bitmap_rle_bytes(struct drbd_conf *mdev, /* paranoia: catch zero runlength. * can only happen if bitmap is modified while we scan it. */ if (rl == 0) { - ERR("unexpected zero runlength while encoding bitmap " + dev_err(DEV, "unexpected zero runlength while encoding bitmap " "t:%u bo:%lu\n", toggle, c->bit_offset); return -1; } @@ -1848,7 +1856,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, /* paranoia: catch zero runlength. * can only happen if bitmap is modified while we scan it. */ if (rl == 0) { - ERR("unexpected zero runlength while encoding bitmap " + dev_err(DEV, "unexpected zero runlength while encoding bitmap " "t:%u bo:%lu\n", toggle, c->bit_offset); return -1; } @@ -1857,7 +1865,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, if (bits == -ENOBUFS) /* buffer full */ break; if (bits <= 0) { - ERR("error while encoding bitmap: %d\n", bits); + dev_err(DEV, "error while encoding bitmap: %d\n", bits); return 0; } @@ -1951,19 +1959,19 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) * and allocate that during initial device creation? */ p = (struct Drbd_Header *) __get_free_page(GFP_NOIO); if (!p) { - ERR("failed to allocate one page buffer in %s\n", __func__); + dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); return FALSE; } if (inc_local(mdev)) { if (drbd_md_test_flag(mdev->bc, MDF_FullSync)) { - INFO("Writing the whole bitmap, MDF_FullSync was set.\n"); + dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n"); drbd_bm_set_all(mdev); if (drbd_bm_write(mdev)) { /* write_bm did fail! Leave full sync flag set in Meta Data * but otherwise process as per normal - need to tell other * side that a full resync is required! */ - ERR("Failed to write bitmap to disk!\n"); + dev_err(DEV, "Failed to write bitmap to disk!\n"); } else { drbd_md_clear_flag(mdev, MDF_FullSync); drbd_md_sync(mdev); @@ -2147,7 +2155,7 @@ STATIC int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * drop_it = !--mdev->ko_count; if (!drop_it) { - ERR("[%s/%d] sock_sendmsg time expired, ko = %u\n", + dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n", current->comm, current->pid, mdev->ko_count); request_ping(mdev); } @@ -2179,10 +2187,11 @@ STATIC int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * STATIC int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, int offset, size_t size) { - int ret; - ret = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); - kunmap(page); - return ret; + int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); + kunmap(page); + if (sent == size) + mdev->send_cnt += size>>9; + return sent; } int _drbd_send_page(struct drbd_conf *mdev, struct page *page, @@ -2222,7 +2231,7 @@ int _drbd_send_page(struct drbd_conf *mdev, struct page *page, continue; } if (sent <= 0) { - drbd_WARN("%s: size=%d len=%d sent=%d\n", + dev_warn(DEV, "%s: size=%d len=%d sent=%d\n", __func__, (int)size, len, sent); break; } @@ -2445,7 +2454,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, if (rv <= 0) { if (rv != -EAGAIN) { - ERR("%s_sendmsg returned %d\n", + dev_err(DEV, "%s_sendmsg returned %d\n", sock == mdev->meta.socket ? "msock" : "sock", rv); drbd_force_state(mdev, NS(conn, BrokenPipe)); @@ -2492,7 +2501,7 @@ STATIC void drbd_unplug_fn(struct request_queue *q) struct drbd_conf *mdev = q->queuedata; MTRACE(TraceTypeUnplug, TraceLvlSummary, - INFO("got unplugged ap_bio_count=%d\n", + dev_info(DEV, "got unplugged ap_bio_count=%d\n", atomic_read(&mdev->ap_bio_cnt)); ); @@ -2613,12 +2622,12 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) void drbd_mdev_cleanup(struct drbd_conf *mdev) { if (mdev->receiver.t_state != None) - ERR("ASSERT FAILED: receiver t_state == %d expected 0.\n", + dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", mdev->receiver.t_state); /* no need to lock it, I'm the only thread alive */ if (atomic_read(&mdev->current_epoch->epoch_size) != 0) - ERR("epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size)); + dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size)); mdev->al_writ_cnt = mdev->bm_writ_cnt = mdev->read_cnt = @@ -2760,23 +2769,23 @@ static void drbd_release_ee_lists(struct drbd_conf *mdev) rr = drbd_release_ee(mdev, &mdev->active_ee); if (rr) - ERR("%d EEs in active list found!\n", rr); + dev_err(DEV, "%d EEs in active list found!\n", rr); rr = drbd_release_ee(mdev, &mdev->sync_ee); if (rr) - ERR("%d EEs in sync list found!\n", rr); + dev_err(DEV, "%d EEs in sync list found!\n", rr); rr = drbd_release_ee(mdev, &mdev->read_ee); if (rr) - ERR("%d EEs in read list found!\n", rr); + dev_err(DEV, "%d EEs in read list found!\n", rr); rr = drbd_release_ee(mdev, &mdev->done_ee); if (rr) - ERR("%d EEs in done list found!\n", rr); + dev_err(DEV, "%d EEs in done list found!\n", rr); rr = drbd_release_ee(mdev, &mdev->net_ee); if (rr) - ERR("%d EEs in net list found!\n", rr); + dev_err(DEV, "%d EEs in net list found!\n", rr); } /* caution. no locking. @@ -2790,7 +2799,7 @@ static void drbd_delete_device(unsigned int minor) /* paranoia asserts */ if (mdev->open_cnt != 0) - ERR("open_cnt = %d in %s:%u", mdev->open_cnt, + dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, __FILE__ , __LINE__); ERR_IF (!list_empty(&mdev->data.work.q)) { @@ -3173,7 +3182,7 @@ void drbd_md_sync(struct drbd_conf *mdev) return; MTRACE(TraceTypeMDIO, TraceLvlSummary, - INFO("Writing meta data super block now.\n"); + dev_info(DEV, "Writing meta data super block now.\n"); ); mutex_lock(&mdev->md_io_mutex); @@ -3201,7 +3210,7 @@ void drbd_md_sync(struct drbd_conf *mdev) clear_bit(MD_DIRTY, &mdev->flags); } else { /* this was a try anyways ... */ - ERR("meta data update failed!\n"); + dev_err(DEV, "meta data update failed!\n"); drbd_chk_io_error(mdev, 1, TRUE); drbd_io_error(mdev, TRUE); @@ -3236,37 +3245,37 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { /* NOTE: cant do normal error processing here as this is called BEFORE disk is attached */ - ERR("Error while reading metadata.\n"); + dev_err(DEV, "Error while reading metadata.\n"); rv = MDIOError; goto err; } if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) { - ERR("Error while reading metadata, magic not found.\n"); + dev_err(DEV, "Error while reading metadata, magic not found.\n"); rv = MDInvalid; goto err; } if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) { - ERR("unexpected al_offset: %d (expected %d)\n", + dev_err(DEV, "unexpected al_offset: %d (expected %d)\n", be32_to_cpu(buffer->al_offset), bdev->md.al_offset); rv = MDInvalid; goto err; } if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { - ERR("unexpected bm_offset: %d (expected %d)\n", + dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); rv = MDInvalid; goto err; } if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) { - ERR("unexpected md_size: %u (expected %u)\n", + dev_err(DEV, "unexpected md_size: %u (expected %u)\n", be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect); rv = MDInvalid; goto err; } if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { - ERR("unexpected bm_bytes_per_bit: %u (expected %u)\n", + dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n", be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); rv = MDInvalid; goto err; @@ -3357,7 +3366,7 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) { u64 val; - INFO("Creating new current UUID\n"); + dev_info(DEV, "Creating new current UUID\n"); D_ASSERT(mdev->bc->md.uuid[Bitmap] == 0); mdev->bc->md.uuid[Bitmap] = mdev->bc->md.uuid[Current]; MTRACE(TraceTypeUuid, TraceLvlMetrics, @@ -3384,7 +3393,7 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) ); } else { if (mdev->bc->md.uuid[Bitmap]) - drbd_WARN("bm UUID already set"); + dev_warn(DEV, "bm UUID already set"); mdev->bc->md.uuid[Bitmap] = val; mdev->bc->md.uuid[Bitmap] &= ~((u64)1); @@ -3484,7 +3493,7 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags)); D_ASSERT(list_empty(&mdev->bm_io_work.w.list)); if (mdev->bm_io_work.why) - ERR("FIXME going to queue '%s' but '%s' still pending?\n", + dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n", why, mdev->bm_io_work.why); mdev->bm_io_work.io_fn = io_fn; @@ -3497,7 +3506,7 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, set_bit(BITMAP_IO_QUEUED, &mdev->flags); drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); } else - ERR("FIXME avoided double queuing bm_io_work\n"); + dev_err(DEV, "FIXME avoided double queuing bm_io_work\n"); } } @@ -3552,7 +3561,7 @@ STATIC void md_sync_timer_fn(unsigned long data) STATIC int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - drbd_WARN("md_sync_timer expired! Worker calls drbd_md_sync().\n"); + dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); drbd_md_sync(mdev); return 1; @@ -3619,7 +3628,7 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) fault_count++; if (printk_ratelimit()) - drbd_WARN("***Simulating %s failure\n", + dev_warn(DEV, "***Simulating %s failure\n", _drbd_fault_str(type)); } @@ -3646,7 +3655,7 @@ STATIC char *_drbd_uuid_str(unsigned int idx) /* Pretty print a UUID value */ void drbd_print_uuid(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) { - INFO(" uuid[%s] now %016llX\n", + dev_info(DEV, " uuid[%s] now %016llX\n", _drbd_uuid_str(idx), (unsigned long long)mdev->bc->md.uuid[idx]); } @@ -3821,12 +3830,12 @@ STATIC char *dump_st(char *p, int len, union drbd_state_t mask, union drbd_state #define INFOP(fmt, args...) \ do { \ if (trace_level >= TraceLvlAll) { \ - INFO("%s:%d: %s [%d] %s %s " fmt , \ + dev_info(DEV, "%s:%d: %s [%d] %s %s " fmt , \ file, line, current->comm, current->pid, \ sockname, recv ? "<<<" : ">>>" , \ ## args); \ } else { \ - INFO("%s %s " fmt, sockname, \ + dev_info(DEV, "%s %s " fmt, sockname, \ recv ? "<<<" : ">>>" , \ ## args); \ } \ @@ -3994,7 +4003,7 @@ void _dump_bio(const char *pfx, struct drbd_conf *mdev, struct bio *bio, int com if (r) sprintf(rb, "Req:%p ", r); - INFO("%s %s:%s%s%s Bio:%p %s- %soffset " SECTOR_FORMAT ", size %x\n", + dev_info(DEV, "%s %s:%s%s%s Bio:%p %s- %soffset " SECTOR_FORMAT ", size %x\n", complete ? "<<<" : ">>>", pfx, biorw == WRITE ? "Write" : "Read", diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 2ae42baf24f4..b3ddc467386f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -57,7 +57,7 @@ STATIC int name ## _from_tags(struct drbd_conf *mdev, \ fields \ default: \ if (tag & T_MANDATORY) { \ - ERR("Unknown tag: %d\n", tag_number(tag)); \ + dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \ return 0; \ } \ } \ @@ -80,7 +80,7 @@ STATIC int name ## _from_tags(struct drbd_conf *mdev, \ #define NL_STRING(pn, pr, member, len) \ case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \ if (dlen > len) { \ - ERR("arg too long: %s (%u wanted, max len: %u bytes)\n", \ + dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \ #member, dlen, (unsigned int)len); \ return 0; \ } \ @@ -181,16 +181,16 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); - INFO("helper command: %s %s %s\n", usermode_helper, cmd, mb); + dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); drbd_bcast_ev_helper(mdev, cmd); ret = call_usermodehelper(usermode_helper, argv, envp, 1); if (ret) - drbd_WARN("helper command: %s %s %s exit code %u (0x%x)\n", + dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, mb, (ret >> 8) & 0xff, ret); else - INFO("helper command: %s %s %s exit code %u (0x%x)\n", + dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", usermode_helper, cmd, mb, (ret >> 8) & 0xff, ret); @@ -213,7 +213,7 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) fp = mdev->bc->dc.fencing; dec_local(mdev); } else { - drbd_WARN("Not fencing peer, I'm not even Consistent myself.\n"); + dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n"); return mdev->state.pdsk; } @@ -240,24 +240,24 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) * This is useful when an unconnected Secondary is asked to * become Primary, but findes the other peer being active. */ ex_to_string = "peer is active"; - drbd_WARN("Peer is primary, outdating myself.\n"); + dev_warn(DEV, "Peer is primary, outdating myself.\n"); nps = DUnknown; _drbd_request_state(mdev, NS(disk, Outdated), ChgWaitComplete); break; case 7: if (fp != Stonith) - ERR("fence-peer() = 7 && fencing != Stonith !!!\n"); + dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n"); ex_to_string = "peer was stonithed"; nps = Outdated; break; default: /* The script is broken ... */ nps = DUnknown; - ERR("fence-peer helper broken, returned %d\n", (r>>8)&0xff); + dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); return nps; } - INFO("fence-peer helper returned %d (%s)\n", + dev_info(DEV, "fence-peer helper returned %d (%s)\n", (r>>8) & 0xff, ex_to_string); return nps; } @@ -322,7 +322,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) nps = drbd_try_outdate_peer(mdev); if (force && nps > Outdated) { - drbd_WARN("Forced into split brain situation!\n"); + dev_warn(DEV, "Forced into split brain situation!\n"); nps = Outdated; } @@ -350,7 +350,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } if (forced) - drbd_WARN("Forced to consider local data as UpToDate!\n"); + dev_warn(DEV, "Forced to consider local data as UpToDate!\n"); fsync_bdev(mdev->this_bdev); @@ -560,10 +560,10 @@ enum determin_dev_size_enum drbd_determin_dev_size(struct drbd_conf *mdev) __mus /* currently there is only one error: ENOMEM! */ size = drbd_bm_capacity(mdev)>>1; if (size == 0) { - ERR("OUT OF MEMORY! " + dev_err(DEV, "OUT OF MEMORY! " "Could not allocate bitmap!\n"); } else { - ERR("BM resizing failed. " + dev_err(DEV, "BM resizing failed. " "Leaving size unchanged at size = %lu KB\n", (unsigned long)size); } @@ -572,7 +572,7 @@ enum determin_dev_size_enum drbd_determin_dev_size(struct drbd_conf *mdev) __mus /* racy, see comments above. */ drbd_set_my_capacity(mdev, size); mdev->bc->md.la_size_sect = size; - INFO("size = %s (%llu KB)\n", ppsize(ppb, size>>1), + dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), (unsigned long long)size>>1); } if (rv == dev_size_error) @@ -584,13 +584,13 @@ enum determin_dev_size_enum drbd_determin_dev_size(struct drbd_conf *mdev) __mus || prev_size != mdev->bc->md.md_size_sect; if (md_moved) { - drbd_WARN("Moving meta-data.\n"); + dev_warn(DEV, "Moving meta-data.\n"); /* assert: (flexible) internal meta data */ } if (la_size_changed || md_moved) { drbd_al_shrink(mdev); /* All extents inactive. */ - INFO("Writing the whole bitmap, size changed\n"); + dev_info(DEV, "Writing the whole bitmap, size changed\n"); rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); drbd_md_mark_dirty(mdev); } @@ -636,11 +636,11 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } if (size == 0) - ERR("Both nodes diskless!\n"); + dev_err(DEV, "Both nodes diskless!\n"); if (u_size) { if (u_size > size) - ERR("Requested disk size is too big (%lu > %lu)\n", + dev_err(DEV, "Requested disk size is too big (%lu > %lu)\n", (unsigned long)u_size>>1, (unsigned long)size>>1); else size = u_size; @@ -676,7 +676,7 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev) sizeof(struct lc_element), mdev); if (n == NULL) { - ERR("Cannot allocate act_log lru!\n"); + dev_err(DEV, "Cannot allocate act_log lru!\n"); return -ENOMEM; } spin_lock_irq(&mdev->al_lock); @@ -684,7 +684,7 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev) for (i = 0; i < t->nr_elements; i++) { e = lc_entry(t, i); if (e->refcnt) - ERR("refcnt(%d)==%d\n", + dev_err(DEV, "refcnt(%d)==%d\n", e->lc_number, e->refcnt); in_use += e->refcnt; } @@ -693,7 +693,7 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev) mdev->act_log = n; spin_unlock_irq(&mdev->al_lock); if (in_use) { - ERR("Activity log still in use!\n"); + dev_err(DEV, "Activity log still in use!\n"); lc_free(n); return -EBUSY; } else { @@ -756,12 +756,12 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu ); if (b->merge_bvec_fn) - drbd_WARN("Backing device's merge_bvec_fn() = %p\n", + dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n", b->merge_bvec_fn); - INFO("max_segment_size ( = BIO size ) = %u\n", q->max_segment_size); + dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", q->max_segment_size); if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { - INFO("Adjusting my ra_pages to backing device's (%lu -> %lu)\n", + dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", q->backing_dev_info.ra_pages, b->backing_dev_info.ra_pages); q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; @@ -800,7 +800,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (nbc == NULL) break; if (ntries++ >= 5) { - drbd_WARN("drbd_nl_disk_conf: mdev->bc not NULL.\n"); + dev_warn(DEV, "drbd_nl_disk_conf: mdev->bc not NULL.\n"); retcode = HaveDiskConfig; goto fail; } @@ -842,7 +842,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); if (IS_ERR(nbc->lo_file)) { - ERR("open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, + dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, PTR_ERR(nbc->lo_file)); nbc->lo_file = NULL; retcode = LDNameInvalid; @@ -858,7 +858,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0); if (IS_ERR(nbc->md_file)) { - ERR("open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, + dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, PTR_ERR(nbc->md_file)); nbc->md_file = NULL; retcode = MDNameInvalid; @@ -916,7 +916,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_md_set_sector_offsets(mdev, nbc); if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) { - ERR("max capacity %llu smaller than disk size %llu\n", + dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", (unsigned long long) drbd_get_max_capacity(nbc), (unsigned long long) nbc->dc.disk_size); retcode = LDDeviceTooSmall; @@ -933,13 +933,13 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } if (drbd_get_capacity(nbc->md_bdev) > max_possible_sectors) - drbd_WARN("truncating very big lower level device " + dev_warn(DEV, "truncating very big lower level device " "to currently maximum possible %llu sectors\n", (unsigned long long) max_possible_sectors); if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { retcode = MDDeviceTooSmall; - drbd_WARN("refusing attach: md-device too small, " + dev_warn(DEV, "refusing attach: md-device too small, " "at least %llu sectors needed for this meta-disk type\n", (unsigned long long) min_md_device_sectors); goto release_bdev2_fail; @@ -975,7 +975,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (mdev->state.conn < Connected && mdev->state.role == Primary && (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[Current] & ~((u64)1))) { - ERR("Can only attach to data with current UUID=%016llX\n", + dev_err(DEV, "Can only attach to data with current UUID=%016llX\n", (unsigned long long)mdev->ed_uuid); retcode = DataOfWrongCurrent; goto force_diskless_dec; @@ -990,7 +990,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* Prevent shrinking of consistent devices ! */ if (drbd_md_test_flag(nbc, MDF_Consistent) && drbd_new_dev_size(mdev, nbc) < nbc->md.la_size_sect) { - drbd_WARN("refusing to truncate a consistent device\n"); + dev_warn(DEV, "refusing to truncate a consistent device\n"); retcode = LDDeviceTooSmall; goto force_diskless_dec; } @@ -1011,9 +1011,9 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (!page) goto force_diskless_dec; - drbd_WARN("Meta data's bdev hardsect = %d != %d\n", + dev_warn(DEV, "Meta data's bdev hardsect = %d != %d\n", hardsect, MD_HARDSECT); - drbd_WARN("Workaround engaged (has performace impact).\n"); + dev_warn(DEV, "Workaround engaged (has performace impact).\n"); mdev->md_io_tmpp = page; } @@ -1084,7 +1084,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp set_bit(RESYNC_AFTER_NEG, &mdev->flags); if (drbd_md_test_flag(mdev->bc, MDF_FullSync)) { - INFO("Assuming that all blocks are out of sync " + dev_info(DEV, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) { retcode = MDIOError; @@ -1197,8 +1197,6 @@ STATIC int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, return 0; } -#define HMAC_NAME_L 20 - STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { @@ -1211,7 +1209,7 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct hlist_head *new_tl_hash = NULL; struct hlist_head *new_ee_hash = NULL; struct drbd_conf *odev; - char hmac_name[HMAC_NAME_L]; + char hmac_name[CRYPTO_MAX_ALG_NAME]; void *int_dig_out = NULL; void *int_dig_in = NULL; void *int_dig_vv = NULL; @@ -1295,7 +1293,7 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, #undef O_PORT if (new_conf->cram_hmac_alg[0] != 0) { - snprintf(hmac_name, HMAC_NAME_L, "hmac(%s)", + snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", new_conf->cram_hmac_alg); tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { @@ -1319,7 +1317,7 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } - if (crypto_tfm_alg_type(crypto_hash_tfm(integrity_w_tfm)) != CRYPTO_ALG_TYPE_DIGEST) { + if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) { retcode=IntegrityAlgNotDigest; goto fail; } @@ -1508,7 +1506,7 @@ void resync_after_online_grow(struct drbd_conf *mdev) { int iass; /* I am sync source */ - INFO("Resync of new storage after online grow\n"); + dev_info(DEV, "Resync of new storage after online grow\n"); if (mdev->state.role != mdev->state.peer) iass = (mdev->state.role == Primary); else @@ -1639,7 +1637,7 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n goto fail; } - if (crypto_tfm_alg_type(crypto_hash_tfm(csums_tfm)) != CRYPTO_ALG_TYPE_DIGEST) { + if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { retcode = CSUMSAlgNotDigest; goto fail; } @@ -1663,7 +1661,7 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n goto fail; } - if (crypto_tfm_alg_type(crypto_hash_tfm(verify_tfm)) != CRYPTO_ALG_TYPE_DIGEST) { + if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { retcode = VERIFYAlgNotDigest; goto fail; } @@ -1672,7 +1670,7 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (sc.cpu_mask[0] != 0) { err = __bitmap_parse(sc.cpu_mask, 32, 0, (unsigned long *)&n_cpu_mask, NR_CPUS); if (err) { - drbd_WARN("__bitmap_parse() failed with %d\n", err); + dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); retcode = CPUMaskParseFailed; goto fail; } @@ -1682,7 +1680,7 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */ #define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT) if (sc.al_extents > AL_MAX) { - ERR("sc.al_extents > %d\n", AL_MAX); + dev_err(DEV, "sc.al_extents > %d\n", AL_MAX); sc.al_extents = AL_MAX; } #undef AL_MAX @@ -1933,6 +1931,7 @@ STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl struct drbd_nl_cfg_reply *reply) { int retcode = NoError; + int skip_initial_sync = 0; int err; struct new_c_uuid args; @@ -1945,28 +1944,40 @@ STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */ - if (mdev->state.conn >= Connected) { - retcode = MayNotBeConnected; - goto out; - } - if (!inc_local(mdev)) { retcode = HaveNoDiskConfig; goto out; } + /* this is "skip initial sync", assume to be clean */ + if (mdev->state.conn == Connected && mdev->agreed_pro_version >= 90 && + mdev->bc->md.uuid[Current] == UUID_JUST_CREATED && args.clear_bm) { + dev_info(DEV, "Preparing to skip initial sync\n"); + skip_initial_sync = 1; + } else if (mdev->state.conn >= Connected) { + retcode = MayNotBeConnected; + goto out_dec; + } + drbd_uuid_set(mdev, Bitmap, 0); /* Rotate Bitmap to History 1, etc... */ drbd_uuid_new_current(mdev); /* New current, previous to Bitmap */ if (args.clear_bm) { err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid"); if (err) { - ERR("Writing bitmap failed with %d\n",err); + dev_err(DEV, "Writing bitmap failed with %d\n",err); retcode = MDIOError; } + if (skip_initial_sync) { + drbd_send_uuids_skip_initial_sync(mdev); + _drbd_uuid_set(mdev, Bitmap, 0); + _drbd_set_state(_NS2(mdev, disk, UpToDate, pdsk, UpToDate), + ChgStateVerbose, NULL); + } } drbd_md_sync(mdev); +out_dec: dec_local(mdev); out: mutex_unlock(&mdev->state_mutex); @@ -2191,7 +2202,7 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state_t state) (struct drbd_nl_cfg_reply *)cn_reply->data; unsigned short *tl = reply->tag_list; - /* drbd_WARN("drbd_bcast_state() got called\n"); */ + /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ tl = get_state_to_tags(mdev, (struct get_state *)&state, tl); *tl++ = TT_END; /* Close the tag list */ @@ -2226,7 +2237,7 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) unsigned short *tl = reply->tag_list; int str_len; - /* drbd_WARN("drbd_bcast_state() got called\n"); */ + /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ str_len = strlen(helper_name)+1; *tl++ = T_helper; @@ -2280,7 +2291,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, , GFP_KERNEL); if (!cn_reply) { - ERR("could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n", + dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n", (unsigned long long)e->sector, e->size); return; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2a2600d113ee..64408cdcab8d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -144,7 +144,7 @@ STATIC struct page *drbd_pp_alloc(struct drbd_conf *mdev, gfp_t gfp_mask) * unless, of course, someone signalled us. */ if (signal_pending(current)) { - drbd_WARN("drbd_pp_alloc interrupted!\n"); + dev_warn(DEV, "drbd_pp_alloc interrupted!\n"); finish_wait(&drbd_pp_wait, &wait); return NULL; } @@ -218,14 +218,14 @@ struct Tl_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); if (!e) { if (!(gfp_mask & __GFP_NOWARN)) - ERR("alloc_ee: Allocation of an EE failed\n"); + dev_err(DEV, "alloc_ee: Allocation of an EE failed\n"); return NULL; } bio = bio_alloc(gfp_mask & ~__GFP_HIGHMEM, div_ceil(data_size, PAGE_SIZE)); if (!bio) { if (!(gfp_mask & __GFP_NOWARN)) - ERR("alloc_ee: Allocation of a bio failed\n"); + dev_err(DEV, "alloc_ee: Allocation of a bio failed\n"); goto fail1; } @@ -237,12 +237,12 @@ struct Tl_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, page = drbd_pp_alloc(mdev, gfp_mask); if (!page) { if (!(gfp_mask & __GFP_NOWARN)) - ERR("alloc_ee: Allocation of a page failed\n"); + dev_err(DEV, "alloc_ee: Allocation of a page failed\n"); goto fail2; } if (!bio_add_page(bio, page, min_t(int, ds, PAGE_SIZE), 0)) { drbd_pp_free(mdev, page); - ERR("alloc_ee: bio_add_page(s=%llu," + dev_err(DEV, "alloc_ee: bio_add_page(s=%llu," "data_size=%u,ds=%u) failed\n", (unsigned long long)sector, data_size, ds); @@ -256,7 +256,7 @@ struct Tl_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, }; int l = q->merge_bvec_fn(q, &bvm, &bio->bi_io_vec[bio->bi_vcnt]); - ERR("merge_bvec_fn() = %d\n", l); + dev_err(DEV, "merge_bvec_fn() = %d\n", l); } /* dump more of the bio. */ @@ -285,7 +285,7 @@ struct Tl_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, e->flags = 0; MTRACE(TraceTypeEE, TraceLvlAll, - INFO("allocated EE sec=%llus size=%u ee=%p\n", + dev_info(DEV, "allocated EE sec=%llus size=%u ee=%p\n", (unsigned long long)sector, data_size, e); ); @@ -309,7 +309,7 @@ void drbd_free_ee(struct drbd_conf *mdev, struct Tl_epoch_entry *e) int i; MTRACE(TraceTypeEE, TraceLvlAll, - INFO("Free EE sec=%llus size=%u ee=%p\n", + dev_info(DEV, "Free EE sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); @@ -391,7 +391,7 @@ STATIC int drbd_process_done_ee(struct drbd_conf *mdev) */ list_for_each_entry_safe(e, t, &work_list, w.list) { MTRACE(TraceTypeEE, TraceLvlAll, - INFO("Process EE on done_ee sec=%llus size=%u ee=%p\n", + dev_info(DEV, "Process EE on done_ee sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); /* list_del not necessary, next/prev members not touched */ @@ -549,12 +549,12 @@ STATIC int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) if (rv < 0) { if (rv == -ECONNRESET) - INFO("sock was reset by peer\n"); + dev_info(DEV, "sock was reset by peer\n"); else if (rv != -ERESTARTSYS) - ERR("sock_recvmsg returned %d\n", rv); + dev_err(DEV, "sock_recvmsg returned %d\n", rv); break; } else if (rv == 0) { - INFO("sock was shut down by peer\n"); + dev_info(DEV, "sock was shut down by peer\n"); break; } else { /* signal came in, or peer/link went down, @@ -640,7 +640,7 @@ out: disconnect_on_error = 0; break; default: - ERR("%s failed, err = %d\n", what, err); + dev_err(DEV, "%s failed, err = %d\n", what, err); } if (disconnect_on_error) drbd_force_state(mdev, NS(conn, Disconnecting)); @@ -687,7 +687,7 @@ out: sock_release(s_listen); if (err < 0) { if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { - ERR("%s failed, err = %d\n", what, err); + dev_err(DEV, "%s failed, err = %d\n", what, err); drbd_force_state(mdev, NS(conn, Disconnecting)); } } @@ -757,7 +757,7 @@ STATIC int drbd_connect(struct drbd_conf *mdev) D_ASSERT(!mdev->data.socket); if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) - ERR("CREATE_BARRIER flag was set in drbd_connect - now cleared!\n"); + dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n"); if (drbd_request_state(mdev, NS(conn, WFConnection)) < SS_Success) return -2; @@ -788,7 +788,7 @@ STATIC int drbd_connect(struct drbd_conf *mdev) msock = s; s = NULL; } else { - ERR("Logic error in drbd_connect()\n"); + dev_err(DEV, "Logic error in drbd_connect()\n"); return -1; } } @@ -811,21 +811,21 @@ retry: switch (try) { case HandShakeS: if (sock) { - drbd_WARN("initial packet S crossed\n"); + dev_warn(DEV, "initial packet S crossed\n"); sock_release(sock); } sock = s; break; case HandShakeM: if (msock) { - drbd_WARN("initial packet M crossed\n"); + dev_warn(DEV, "initial packet M crossed\n"); sock_release(msock); } msock = s; set_bit(DISCARD_CONCURRENT, &mdev->flags); break; default: - drbd_WARN("Error receiving initial packet\n"); + dev_warn(DEV, "Error receiving initial packet\n"); sock_release(s); if (random32() & 1) goto retry; @@ -897,7 +897,7 @@ retry: if (mdev->cram_hmac_tfm) { /* drbd_request_state(mdev, NS(conn, WFAuth)); */ if (!drbd_do_auth(mdev)) { - ERR("Authentication of peer failed\n"); + dev_err(DEV, "Authentication of peer failed\n"); return -1; } } @@ -930,13 +930,13 @@ STATIC int drbd_recv_header(struct drbd_conf *mdev, struct Drbd_Header *h) r = drbd_recv(mdev, h, sizeof(*h)); if (unlikely(r != sizeof(*h))) { - ERR("short read expecting header on sock: r=%d\n", r); + dev_err(DEV, "short read expecting header on sock: r=%d\n", r); return FALSE; }; h->command = be16_to_cpu(h->command); h->length = be16_to_cpu(h->length); if (unlikely(h->magic != BE_DRBD_MAGIC)) { - ERR("magic?? on data m: 0x%lx c: %d l: %d\n", + dev_err(DEV, "magic?? on data m: 0x%lx c: %d l: %d\n", (long)be32_to_cpu(h->magic), h->command, h->length); return FALSE; @@ -953,7 +953,7 @@ STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d if (mdev->write_ordering >= WO_bdev_flush && inc_local(mdev)) { rv = blkdev_issue_flush(mdev->bc->backing_bdev, NULL); if (rv) { - ERR("local disk flush failed with status %d\n", rv); + dev_err(DEV, "local disk flush failed with status %d\n", rv); /* would rather check on EOPNOTSUPP, but that is not reliable. * don't try again for ANY return value != 0 * if (rv == -EOPNOTSUPP) */ @@ -1035,7 +1035,7 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, } MTRACE(TraceTypeEpochs, TraceLvlAll, - INFO("Update epoch %p/%d { size=%d active=%d %c%c n%c%c } ev=%s\n", + dev_info(DEV, "Update epoch %p/%d { size=%d active=%d %c%c n%c%c } ev=%s\n", epoch, epoch->barrier_nr, epoch_size, atomic_read(&epoch->active), test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) ? 'n' : '-', test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) ? 'b' : '-', @@ -1076,7 +1076,7 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, ev = EV_became_last | (ev & EV_cleanup); mdev->epochs--; MTRACE(TraceTypeEpochs, TraceLvlSummary, - INFO("Freeing epoch %p/%d { size=%d } nr_epochs=%d\n", + dev_info(DEV, "Freeing epoch %p/%d { size=%d } nr_epochs=%d\n", epoch, epoch->barrier_nr, epoch_size, mdev->epochs); ); kfree(epoch); @@ -1105,14 +1105,14 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, fw = kmalloc(sizeof(*fw), GFP_ATOMIC); if (fw) { MTRACE(TraceTypeEpochs, TraceLvlMetrics, - INFO("Schedul flush %p/%d { size=%d } nr_epochs=%d\n", + dev_info(DEV, "Schedul flush %p/%d { size=%d } nr_epochs=%d\n", epoch, epoch->barrier_nr, epoch_size, mdev->epochs); ); fw->w.cb = w_flush; fw->epoch = epoch; drbd_queue_work(&mdev->data.work, &fw->w); } else { - drbd_WARN("Could not kmalloc a flush_work obj\n"); + dev_warn(DEV, "Could not kmalloc a flush_work obj\n"); set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); /* That is not a recursion, only one level */ drbd_may_finish_epoch(mdev, epoch, EV_barrier_done); @@ -1147,7 +1147,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) wo = WO_none; mdev->write_ordering = wo; if (pwo != mdev->write_ordering || wo == WO_bio_barrier) - INFO("Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]); + dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]); } /** @@ -1170,7 +1170,7 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea print that warning an continue corretly for all future requests with WO_bdev_flush */ if (previous_epoch(mdev, e->epoch)) - drbd_WARN("Write ordering was not enforced (one time event)\n"); + dev_warn(DEV, "Write ordering was not enforced (one time event)\n"); /* prepare bio for re-submit, * re-init volatile members */ @@ -1250,7 +1250,7 @@ STATIC int receive_Barrier(struct drbd_conf *mdev, struct Drbd_Header *h) epoch = kmalloc(sizeof(struct drbd_epoch), GFP_KERNEL); if (!epoch) { - drbd_WARN("Allocation of an epoch failed, slowing down\n"); + dev_warn(DEV, "Allocation of an epoch failed, slowing down\n"); issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); drbd_wait_ee_list_empty(mdev, &mdev->active_ee); if (issue_flush) { @@ -1274,7 +1274,7 @@ STATIC int receive_Barrier(struct drbd_conf *mdev, struct Drbd_Header *h) mdev->current_epoch = epoch; mdev->epochs++; MTRACE(TraceTypeEpochs, TraceLvlMetrics, - INFO("Allocat epoch %p/xxxx { } nr_epochs=%d\n", epoch, mdev->epochs); + dev_info(DEV, "Allocat epoch %p/xxxx { } nr_epochs=%d\n", epoch, mdev->epochs); ); } else { /* The current_epoch got recycled while we allocated this one... */ @@ -1304,7 +1304,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ if (dgs) { rr = drbd_recv(mdev, dig_in, dgs); if (rr != dgs) { - drbd_WARN("short read receiving data digest: read %d expected %d\n", + dev_warn(DEV, "short read receiving data digest: read %d expected %d\n", rr, dgs); return NULL; } @@ -1326,7 +1326,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ kunmap(page); if (rr != min_t(int, ds, PAGE_SIZE)) { drbd_free_ee(mdev, e); - drbd_WARN("short read receiving data: read %d expected %d\n", + dev_warn(DEV, "short read receiving data: read %d expected %d\n", rr, min_t(int, ds, PAGE_SIZE)); return NULL; } @@ -1336,7 +1336,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ if (dgs) { drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { - ERR("Digest integrity check FAILED.\n"); + dev_err(DEV, "Digest integrity check FAILED.\n"); drbd_bcast_ee(mdev, "digest failed", dgs, dig_in, dig_vv, e); drbd_free_ee(mdev, e); @@ -1363,7 +1363,7 @@ STATIC int drbd_drain_block(struct drbd_conf *mdev, int data_size) rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE)); if (rr != min_t(int, data_size, PAGE_SIZE)) { rv = 0; - drbd_WARN("short read receiving data: read %d expected %d\n", + dev_warn(DEV, "short read receiving data: read %d expected %d\n", rr, min_t(int, data_size, PAGE_SIZE)); break; } @@ -1398,7 +1398,7 @@ STATIC int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, if (dgs) { rr = drbd_recv(mdev, dig_in, dgs); if (rr != dgs) { - drbd_WARN("short read receiving data reply digest: read %d expected %d\n", + dev_warn(DEV, "short read receiving data reply digest: read %d expected %d\n", rr, dgs); return 0; } @@ -1416,7 +1416,7 @@ STATIC int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, expect); kunmap(bvec->bv_page); if (rr != expect) { - drbd_WARN("short read receiving data reply: " + dev_warn(DEV, "short read receiving data reply: " "read %d expected %d\n", rr, expect); return 0; @@ -1427,7 +1427,7 @@ STATIC int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, if (dgs) { drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { - ERR("Digest integrity check FAILED. Broken NICs?\n"); + dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n"); return 0; } } @@ -1486,7 +1486,7 @@ STATIC int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si spin_unlock_irq(&mdev->req_lock); MTRACE(TraceTypeEE, TraceLvlAll, - INFO("submit EE (RS)WRITE sec=%llus size=%u ee=%p\n", + dev_info(DEV, "submit EE (RS)WRITE sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); dump_internal_bio("Sec", mdev, e->private_bio, 0); @@ -1519,7 +1519,7 @@ STATIC int receive_DataReply(struct drbd_conf *mdev, struct Drbd_Header *h) req = _ar_id_to_req(mdev, p->block_id, sector); spin_unlock_irq(&mdev->req_lock); if (unlikely(!req)) { - ERR("Got a corrupt block_id/sector pair(1).\n"); + dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); return FALSE; } @@ -1562,7 +1562,7 @@ STATIC int receive_RSDataReply(struct drbd_conf *mdev, struct Drbd_Header *h) ok = recv_resync_read(mdev, sector, data_size); } else { if (__ratelimit(&drbd_ratelimit_state)) - ERR("Can not write resync data to local disk.\n"); + dev_err(DEV, "Can not write resync data to local disk.\n"); ok = drbd_drain_block(mdev, data_size); @@ -1684,7 +1684,7 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) spin_lock(&mdev->peer_seq_lock); if (timeout == 0 && p_seq == mdev->peer_seq) { ret = -ETIMEDOUT; - ERR("ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n"); + dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n"); break; } } @@ -1718,7 +1718,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) * corresponding dec_local done either below (on error), * or in drbd_endio_write_sec. */ if (__ratelimit(&drbd_ratelimit_state)) - ERR("Can not write mirrored data block " + dev_err(DEV, "Can not write mirrored data block " "to local disk.\n"); spin_lock(&mdev->peer_seq_lock); if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num)) @@ -1753,7 +1753,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list); if (epoch == e->epoch) { MTRACE(TraceTypeEpochs, TraceLvlMetrics, - INFO("Add barrier %p/%d\n", + dev_info(DEV, "Add barrier %p/%d\n", epoch, epoch->barrier_nr); ); set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); @@ -1763,7 +1763,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) if (atomic_read(&epoch->epoch_size) > 1 || !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) { MTRACE(TraceTypeEpochs, TraceLvlMetrics, - INFO("Add barrier %p/%d, setting bi in %p/%d\n", + dev_info(DEV, "Add barrier %p/%d, setting bi in %p/%d\n", e->epoch, e->epoch->barrier_nr, epoch, epoch->barrier_nr); ); @@ -1856,7 +1856,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) /* only ALERT on first iteration, * we may be woken up early... */ if (first) - ALERT("%s[%u] Concurrent local write detected!" + dev_alert(DEV, "%s[%u] Concurrent local write detected!" " new: %llus +%u; pending: %llus +%u\n", current->comm, current->pid, (unsigned long long)sector, size, @@ -1872,7 +1872,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) /* Discard Ack only for the _first_ iteration */ if (first && discard && have_unacked) { - ALERT("Concurrent write! [DISCARD BY FLAG] sec=%llus\n", + dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n", (unsigned long long)sector); inc_unacked(mdev); e->w.cb = e_send_discard_ack; @@ -1901,7 +1901,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) spin_unlock_irq(&mdev->req_lock); if (first) { first = 0; - ALERT("Concurrent write! [W AFTERWARDS] " + dev_alert(DEV, "Concurrent write! [W AFTERWARDS] " "sec=%llus\n", (unsigned long long)sector); } else if (discard) { /* we had none on the first iteration. @@ -1941,7 +1941,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) } MTRACE(TraceTypeEE, TraceLvlAll, - INFO("submit EE (DATA)WRITE sec=%llus size=%u ee=%p\n", + dev_info(DEV, "submit EE (DATA)WRITE sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); @@ -1981,19 +1981,19 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct Drbd_Header *h) size = be32_to_cpu(p->blksize); if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { - ERR("%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, + dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, (unsigned long long)sector, size); return FALSE; } if (sector + (size>>9) > capacity) { - ERR("%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, + dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, (unsigned long long)sector, size); return FALSE; } if (!inc_local_if_state(mdev, UpToDate)) { if (__ratelimit(&drbd_ratelimit_state)) - ERR("Can not satisfy peer's read request, " + dev_err(DEV, "Can not satisfy peer's read request, " "no local data.\n"); drbd_send_ack_rp(mdev, h->command == DataRequest ? NegDReply : NegRSDReply , p); @@ -2093,7 +2093,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct Drbd_Header *h) default: - ERR("unexpected command (%s) in receive_DataRequest\n", + dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n", cmdname(h->command)); fault_type = DRBD_FAULT_MAX; } @@ -2105,7 +2105,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct Drbd_Header *h) inc_unacked(mdev); MTRACE(TraceTypeEE, TraceLvlAll, - INFO("submit EE READ sec=%llus size=%u ee=%p\n", + dev_info(DEV, "submit EE READ sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); @@ -2131,7 +2131,7 @@ STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) case Consensus: case DiscardSecondary: case CallHelper: - ERR("Configuration error.\n"); + dev_err(DEV, "Configuration error.\n"); break; case Disconnect: break; @@ -2155,7 +2155,7 @@ STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) break; } /* Else fall through to one of the other strategies... */ - drbd_WARN("Discard younger/older primary did not found a decision\n" + dev_warn(DEV, "Discard younger/older primary did not found a decision\n" "Using discard-least-changes instead\n"); case DiscardZeroChg: if (ch_peer == 0 && ch_self == 0) { @@ -2201,7 +2201,7 @@ STATIC int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) case DiscardLeastChg: case DiscardLocal: case DiscardRemote: - ERR("Configuration error.\n"); + dev_err(DEV, "Configuration error.\n"); break; case Disconnect: break; @@ -2224,7 +2224,7 @@ STATIC int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) if (self != SS_Success) { drbd_khelper(mdev, "pri-lost-after-sb"); } else { - drbd_WARN("Sucessfully gave up primary role.\n"); + dev_warn(DEV, "Sucessfully gave up primary role.\n"); rv = hg; } } else @@ -2249,7 +2249,7 @@ STATIC int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) case DiscardRemote: case Consensus: case DiscardSecondary: - ERR("Configuration error.\n"); + dev_err(DEV, "Configuration error.\n"); break; case Violently: rv = drbd_asb_recover_0p(mdev); @@ -2263,7 +2263,7 @@ STATIC int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) if (self != SS_Success) { drbd_khelper(mdev, "pri-lost-after-sb"); } else { - drbd_WARN("Sucessfully gave up primary role.\n"); + dev_warn(DEV, "Sucessfully gave up primary role.\n"); rv = hg; } } else @@ -2277,10 +2277,10 @@ STATIC void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid, u64 bits, u64 flags) { if (!uuid) { - INFO("%s uuid info vanished while I was looking!\n", text); + dev_info(DEV, "%s uuid info vanished while I was looking!\n", text); return; } - INFO("%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", + dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", text, (unsigned long long)uuid[Current], (unsigned long long)uuid[Bitmap], @@ -2404,15 +2404,15 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol hg = drbd_uuid_compare(mdev, &rule_nr); - INFO("drbd_sync_handshake:\n"); + dev_info(DEV, "drbd_sync_handshake:\n"); drbd_uuid_dump(mdev, "self", mdev->bc->md.uuid, mdev->state.disk >= Negotiating ? drbd_bm_total_weight(mdev) : 0, 0); drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UUID_SIZE], mdev->p_uuid[UUID_FLAGS]); - INFO("uuid_compare()=%d by rule %d\n", hg, rule_nr); + dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr); if (hg == -1000) { - ALERT("Unrelated data, aborting!\n"); + dev_alert(DEV, "Unrelated data, aborting!\n"); return conn_mask; } @@ -2422,7 +2422,7 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol hg = mydisk > Inconsistent ? 1 : -1; if (f) hg = hg*2; - INFO("Becoming sync %s due to disk states.\n", + dev_info(DEV, "Becoming sync %s due to disk states.\n", hg > 0 ? "source" : "target"); } @@ -2443,11 +2443,11 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol break; } if (abs(hg) < 100) { - drbd_WARN("Split-Brain detected, %d primaries, " + dev_warn(DEV, "Split-Brain detected, %d primaries, " "automatically solved. Sync from %s node\n", pcount, (hg < 0) ? "peer" : "this"); if (forced) { - drbd_WARN("Doing a full sync, since" + dev_warn(DEV, "Doing a full sync, since" " UUIDs where ambiguous.\n"); hg = hg*2; } @@ -2461,19 +2461,19 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol hg = 1; if (abs(hg) < 100) - drbd_WARN("Split-Brain detected, manually solved. " + dev_warn(DEV, "Split-Brain detected, manually solved. " "Sync from %s node\n", (hg < 0) ? "peer" : "this"); } if (hg == -100) { - ALERT("Split-Brain detected, dropping connection!\n"); + dev_alert(DEV, "Split-Brain detected, dropping connection!\n"); drbd_khelper(mdev, "split-brain"); return conn_mask; } if (hg > 0 && mydisk <= Inconsistent) { - ERR("I shall become SyncSource, but I am inconsistent!\n"); + dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n"); return conn_mask; } @@ -2484,16 +2484,16 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol drbd_khelper(mdev, "pri-lost"); /* fall through */ case Disconnect: - ERR("I shall become SyncTarget, but I am primary!\n"); + dev_err(DEV, "I shall become SyncTarget, but I am primary!\n"); return conn_mask; case Violently: - drbd_WARN("Becoming SyncTarget, violating the stable-data" + dev_warn(DEV, "Becoming SyncTarget, violating the stable-data" "assumption\n"); } } if (abs(hg) >= 2) { - INFO("Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); + dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) return conn_mask; } @@ -2505,7 +2505,7 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } else { rv = Connected; if (drbd_bm_total_weight(mdev)) { - INFO("No resync, but %lu bits in bitmap!\n", + dev_info(DEV, "No resync, but %lu bits in bitmap!\n", drbd_bm_total_weight(mdev)); } } @@ -2558,32 +2558,32 @@ STATIC int receive_protocol(struct drbd_conf *mdev, struct Drbd_Header *h) p_two_primaries = be32_to_cpu(p->two_primaries); if (p_proto != mdev->net_conf->wire_protocol) { - ERR("incompatible communication protocols\n"); + dev_err(DEV, "incompatible communication protocols\n"); goto disconnect; } if (cmp_after_sb(p_after_sb_0p, mdev->net_conf->after_sb_0p)) { - ERR("incompatible after-sb-0pri settings\n"); + dev_err(DEV, "incompatible after-sb-0pri settings\n"); goto disconnect; } if (cmp_after_sb(p_after_sb_1p, mdev->net_conf->after_sb_1p)) { - ERR("incompatible after-sb-1pri settings\n"); + dev_err(DEV, "incompatible after-sb-1pri settings\n"); goto disconnect; } if (cmp_after_sb(p_after_sb_2p, mdev->net_conf->after_sb_2p)) { - ERR("incompatible after-sb-2pri settings\n"); + dev_err(DEV, "incompatible after-sb-2pri settings\n"); goto disconnect; } if (p_want_lose && mdev->net_conf->want_lose) { - ERR("both sides have the 'want_lose' flag set\n"); + dev_err(DEV, "both sides have the 'want_lose' flag set\n"); goto disconnect; } if (p_two_primaries != mdev->net_conf->two_primaries) { - ERR("incompatible setting of the two-primaries options\n"); + dev_err(DEV, "incompatible setting of the two-primaries options\n"); goto disconnect; } @@ -2595,10 +2595,10 @@ STATIC int receive_protocol(struct drbd_conf *mdev, struct Drbd_Header *h) p_integrity_alg[SHARED_SECRET_MAX-1] = 0; if (strcmp(p_integrity_alg, my_alg)) { - ERR("incompatible setting of the data-integrity-alg\n"); + dev_err(DEV, "incompatible setting of the data-integrity-alg\n"); goto disconnect; } - INFO("data-integrity-alg: %s\n", + dev_info(DEV, "data-integrity-alg: %s\n", my_alg[0] ? my_alg : (unsigned char *)""); } @@ -2624,13 +2624,13 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { - ERR("Can not allocate \"%s\" as %s (reason: %ld)\n", - alg, name, PTR_ERR(tfm)); + dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n", + alg, name, PTR_ERR(tfm)); return tfm; } if (crypto_tfm_alg_type(crypto_hash_tfm(tfm)) != CRYPTO_ALG_TYPE_DIGEST) { crypto_free_hash(tfm); - ERR("\"%s\" is not a digest (%s)\n", alg, name); + dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name); return ERR_PTR(-EINVAL); } return tfm; @@ -2651,7 +2651,7 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct Drbd_Header *h) : /* 89 */ sizeof(struct Drbd_SyncParam89_Packet); if (h->length > exp_max_sz) { - ERR("SyncParam packet too long: received %u, expected <= %u bytes\n", + dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", h->length, exp_max_sz); return FALSE; } @@ -2676,7 +2676,7 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct Drbd_Header *h) if (apv >= 88) { if (apv == 88) { if (data_size > SHARED_SECRET_MAX) { - ERR("verify-alg too long, " + dev_err(DEV, "verify-alg too long, " "peer wants %u, accepting only %u byte\n", data_size, SHARED_SECRET_MAX); return FALSE; @@ -2701,7 +2701,7 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct Drbd_Header *h) if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) { if (mdev->state.conn == WFReportParams) { - ERR("Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", + dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", mdev->sync_conf.verify_alg, p->verify_alg); goto disconnect; } @@ -2713,7 +2713,7 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct Drbd_Header *h) if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) { if (mdev->state.conn == WFReportParams) { - ERR("Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", + dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", mdev->sync_conf.csums_alg, p->csums_alg); goto disconnect; } @@ -2731,14 +2731,14 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct Drbd_Header *h) mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1; crypto_free_hash(mdev->verify_tfm); mdev->verify_tfm = verify_tfm; - INFO("using verify-alg: \"%s\"\n", p->verify_alg); + dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg); } if (csums_tfm) { strcpy(mdev->sync_conf.csums_alg, p->csums_alg); mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1; crypto_free_hash(mdev->csums_tfm); mdev->csums_tfm = csums_tfm; - INFO("using csums-alg: \"%s\"\n", p->csums_alg); + dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); } spin_unlock(&mdev->peer_seq_lock); } @@ -2765,7 +2765,7 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, return; d = (a > b) ? (a - b) : (b - a); if (d > (a>>3) || d > (b>>3)) - drbd_WARN("Considerable difference in %s: %llus vs. %llus\n", s, + dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s, (unsigned long long)a, (unsigned long long)b); } @@ -2786,7 +2786,7 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct Drbd_Header *h) p_usize = be64_to_cpu(p->u_size); if (p_size == 0 && mdev->state.disk == Diskless) { - ERR("some backing storage is needed\n"); + dev_err(DEV, "some backing storage is needed\n"); drbd_force_state(mdev, NS(conn, Disconnecting)); return FALSE; } @@ -2812,7 +2812,7 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct Drbd_Header *h) if (mdev->bc->dc.disk_size != p_usize) { mdev->bc->dc.disk_size = p_usize; - INFO("Peer sets u_size to %lu sectors\n", + dev_info(DEV, "Peer sets u_size to %lu sectors\n", (unsigned long)mdev->bc->dc.disk_size); } @@ -2822,7 +2822,7 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct Drbd_Header *h) drbd_get_capacity(mdev->this_bdev) && mdev->state.disk >= Outdated && mdev->state.conn < Connected) { - ERR("The peer's disk size is too small!\n"); + dev_err(DEV, "The peer's disk size is too small!\n"); drbd_force_state(mdev, NS(conn, Disconnecting)); mdev->bc->dc.disk_size = my_usize; dec_local(mdev); @@ -2914,12 +2914,31 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct Drbd_Header *h) mdev->state.disk < Inconsistent && mdev->state.role == Primary && (mdev->ed_uuid & ~((u64)1)) != (p_uuid[Current] & ~((u64)1))) { - ERR("Can only connect to data with current UUID=%016llX\n", + dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", (unsigned long long)mdev->ed_uuid); drbd_force_state(mdev, NS(conn, Disconnecting)); return FALSE; } + if (inc_local(mdev)) { + int skip_initial_sync = + mdev->state.conn == Connected && + mdev->agreed_pro_version >= 90 && + mdev->bc->md.uuid[Current] == UUID_JUST_CREATED && + (p_uuid[UUID_FLAGS] & 8); + if (skip_initial_sync) { + dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); + drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, + "clear_n_write from receive_uuids"); + _drbd_uuid_set(mdev, Current, p_uuid[Current]); + _drbd_uuid_set(mdev, Bitmap, 0); + _drbd_set_state(_NS2(mdev, disk, UpToDate, pdsk, UpToDate), + ChgStateVerbose, NULL); + drbd_md_sync(mdev); + } + dec_local(mdev); + } + /* Before we test for the disk state, we should wait until an eventually ongoing cluster wide state change is finished. That is important if we are primary and are detaching from our disk. We need to see the @@ -3010,7 +3029,7 @@ STATIC int receive_state(struct drbd_conf *mdev, struct Drbd_Header *h) real_peer_disk = peer_state.disk; if (peer_state.disk == Negotiating) { real_peer_disk = mdev->p_uuid[UUID_FLAGS] & 4 ? Inconsistent : Consistent; - INFO("real peer disk state = %s\n", disks_to_name(real_peer_disk)); + dev_info(DEV, "real peer disk state = %s\n", disks_to_name(real_peer_disk)); } spin_lock_irq(&mdev->req_lock); @@ -3041,7 +3060,7 @@ STATIC int receive_state(struct drbd_conf *mdev, struct Drbd_Header *h) drbd_force_state(mdev, NS(disk, Diskless)); nconn = Connected; } else if (peer_state.disk == Negotiating) { - ERR("Disk attach process on the peer node was aborted.\n"); + dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); peer_state.disk = Diskless; } else { D_ASSERT(oconn == WFReportParams); @@ -3114,7 +3133,7 @@ STATIC int receive_sync_uuid(struct drbd_conf *mdev, struct Drbd_Header *h) dec_local(mdev); } else - ERR("Ignoring SyncUUID packet!\n"); + dev_err(DEV, "Ignoring SyncUUID packet!\n"); return TRUE; } @@ -3129,7 +3148,7 @@ receive_bitmap_plain(struct drbd_conf *mdev, struct Drbd_Header *h, unsigned want = num_words * sizeof(long); if (want != h->length) { - ERR("%s:want (%u) != h->length (%u)\n", __func__, want, h->length); + dev_err(DEV, "%s:want (%u) != h->length (%u)\n", __func__, want, h->length); return FAILED; } if (want == 0) @@ -3177,15 +3196,17 @@ recv_bm_rle_bits(struct drbd_conf *mdev, if (toggle) { e = s + rl -1; if (e >= c->bm_bits) { - ERR("bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); + dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e); return FAILED; } _drbd_bm_set_bits(mdev, s, e); } if (have < bits) { - ERR("bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n", have, bits, look_ahead, - bs.cur.b - p->code, bs.buf_len); + dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n", + have, bits, look_ahead, + (unsigned int)(bs.cur.b - p->code), + (unsigned int)bs.buf_len); return FAILED; } look_ahead >>= bits; @@ -3225,7 +3246,7 @@ recv_bm_rle_bytes(struct drbd_conf *mdev, * in p->encoding & 0x80. */ for (toggle = DCBP_get_start(p); len; s += rl, toggle = !toggle) { if (s >= c->bm_bits) { - ERR("bitmap overflow (s:%lu) while decoding bitmap RLE packet\n", s); + dev_err(DEV, "bitmap overflow (s:%lu) while decoding bitmap RLE packet\n", s); return FAILED; } @@ -3236,7 +3257,7 @@ recv_bm_rle_bytes(struct drbd_conf *mdev, len -= n; if (rl == 0) { - ERR("unexpected zero runlength while decoding bitmap RLE packet\n"); + dev_err(DEV, "unexpected zero runlength while decoding bitmap RLE packet\n"); return FAILED; } @@ -3247,7 +3268,7 @@ recv_bm_rle_bytes(struct drbd_conf *mdev, /* set bits: merge into bitmap. */ e = s + rl -1; if (e >= c->bm_bits) { - ERR("bitmap overflow (e:%lu) while decoding bitmap RLE packet\n", e); + dev_err(DEV, "bitmap overflow (e:%lu) while decoding bitmap RLE packet\n", e); return FAILED; } _drbd_bm_set_bits(mdev, s, e); @@ -3276,7 +3297,7 @@ decode_bitmap_c(struct drbd_conf *mdev, case RLE_VLI_Bytes: return recv_bm_rle_bytes(mdev, p, c); } - ERR("receive_bitmap_c: unknown encoding %u\n", p->encoding); + dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); return FAILED; } @@ -3297,7 +3318,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, r = plain_would_take % total; r = (r > UINT_MAX/100) ? (r / (total+99/100)) : (100 * r / total); - INFO("%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " + dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " "total %u; compression factor: %u.%02u\n", direction, c->bytes[1], c->packets[1], @@ -3328,7 +3349,7 @@ STATIC int receive_bitmap(struct drbd_conf *mdev, struct Drbd_Header *h) * and allocate that during initial device creation? */ buffer = (unsigned long *) __get_free_page(GFP_NOIO); if (!buffer) { - ERR("failed to allocate one page buffer in %s\n", __func__); + dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); goto out; } @@ -3346,7 +3367,7 @@ STATIC int receive_bitmap(struct drbd_conf *mdev, struct Drbd_Header *h) struct Drbd_Compressed_Bitmap_Packet *p; if (h->length > BM_PACKET_PAYLOAD_BYTES) { - ERR("ReportCBitmap packet too large\n"); + dev_err(DEV, "ReportCBitmap packet too large\n"); goto out; } /* use the page buff */ @@ -3355,12 +3376,12 @@ STATIC int receive_bitmap(struct drbd_conf *mdev, struct Drbd_Header *h) if (drbd_recv(mdev, p->head.payload, h->length) != h->length) goto out; if (p->head.length <= (sizeof(*p) - sizeof(p->head))) { - ERR("ReportCBitmap packet too small (l:%u)\n", p->head.length); + dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", p->head.length); return FAILED; } ret = decode_bitmap_c(mdev, p, &c); } else { - drbd_WARN("receive_bitmap: h->command neither ReportBitMap nor ReportCBitMap (is 0x%x)", h->command); + dev_warn(DEV, "receive_bitmap: h->command neither ReportBitMap nor ReportCBitMap (is 0x%x)", h->command); goto out; } @@ -3388,7 +3409,7 @@ STATIC int receive_bitmap(struct drbd_conf *mdev, struct Drbd_Header *h) } else if (mdev->state.conn != WFBitMapS) { /* admin may have requested Disconnecting, * other threads may have noticed network errors */ - INFO("unexpected cstate (%s) in receive_bitmap\n", + dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n", conns_to_name(mdev->state.conn)); } @@ -3407,7 +3428,7 @@ STATIC int receive_skip(struct drbd_conf *mdev, struct Drbd_Header *h) static char sink[128]; int size, want, r; - drbd_WARN("skipping unknown optional packet type %d, l: %d!\n", + dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n", h->command, h->length); size = h->length; @@ -3484,13 +3505,13 @@ STATIC void drbdd(struct drbd_conf *mdev) handler = NULL; if (unlikely(!handler)) { - ERR("unknown packet type %d, l: %d!\n", + dev_err(DEV, "unknown packet type %d, l: %d!\n", header->command, header->length); drbd_force_state(mdev, NS(conn, ProtocolError)); break; } if (unlikely(!handler(mdev, header))) { - ERR("error receiving %s, l: %d!\n", + dev_err(DEV, "error receiving %s, l: %d!\n", cmdname(header->command), header->length); drbd_force_state(mdev, NS(conn, ProtocolError)); break; @@ -3527,7 +3548,7 @@ STATIC void drbd_fail_pending_reads(struct drbd_conf *mdev) } for (i = 0; i < APP_R_HSIZE; i++) if (!hlist_empty(mdev->app_reads_hash+i)) - drbd_WARN("ASSERT FAILED: app_reads_hash[%d].first: " + dev_warn(DEV, "ASSERT FAILED: app_reads_hash[%d].first: " "%p, should be NULL\n", i, mdev->app_reads_hash[i].first); memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); @@ -3545,7 +3566,7 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) if (mdev->state.conn == StandAlone) return; if (mdev->state.conn >= WFConnection) - ERR("ASSERT FAILED cstate = %s, expected < WFConnection\n", + dev_err(DEV, "ASSERT FAILED cstate = %s, expected < WFConnection\n", conns_to_name(mdev->state.conn)); /* asender does not clean up anything. it must not interfere, either */ @@ -3600,7 +3621,7 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) drbd_fail_pending_reads(mdev); - INFO("Connection closed\n"); + dev_info(DEV, "Connection closed\n"); drbd_md_sync(mdev); @@ -3639,7 +3660,7 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) /* paranoia code */ for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++) if (h->first) - ERR("ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", + dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", (int)(h - mdev->ee_hash), h->first); kfree(mdev->ee_hash); mdev->ee_hash = NULL; @@ -3648,7 +3669,7 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) /* paranoia code */ for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) if (h->first) - ERR("ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", + dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", (int)(h - mdev->tl_hash), h->first); kfree(mdev->tl_hash); mdev->tl_hash = NULL; @@ -3668,9 +3689,9 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) * we already released the socket!? */ i = atomic_read(&mdev->pp_in_use); if (i) - DBG("pp_in_use = %u, expected 0\n", i); + dev_dbg(DEV, "pp_in_use = %u, expected 0\n", i); if (!list_empty(&mdev->net_ee)) - DBG("net_ee not empty!\n"); + dev_dbg(DEV, "net_ee not empty!\n"); D_ASSERT(list_empty(&mdev->read_ee)); D_ASSERT(list_empty(&mdev->active_ee)); @@ -3698,7 +3719,7 @@ STATIC int drbd_send_handshake(struct drbd_conf *mdev) int ok; if (mutex_lock_interruptible(&mdev->data.mutex)) { - ERR("interrupted during initial handshake\n"); + dev_err(DEV, "interrupted during initial handshake\n"); return 0; /* interrupted. not ok. */ } @@ -3740,13 +3761,13 @@ int drbd_do_handshake(struct drbd_conf *mdev) return 0; if (p->head.command != HandShake) { - ERR("expected HandShake packet, received: %s (0x%04x)\n", + dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n", cmdname(p->head.command), p->head.command); return -1; } if (p->head.length != expect) { - ERR("expected HandShake length: %u, received: %u\n", + dev_err(DEV, "expected HandShake length: %u, received: %u\n", expect, p->head.length); return -1; } @@ -3754,7 +3775,7 @@ int drbd_do_handshake(struct drbd_conf *mdev) rv = drbd_recv(mdev, &p->head.payload, expect); if (rv != expect) { - ERR("short read receiving handshake packet: l=%u\n", rv); + dev_err(DEV, "short read receiving handshake packet: l=%u\n", rv); return 0; } @@ -3772,13 +3793,13 @@ int drbd_do_handshake(struct drbd_conf *mdev) mdev->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); - INFO("Handshake successful: " + dev_info(DEV, "Handshake successful: " "Agreed network protocol version %d\n", mdev->agreed_pro_version); return 1; incompat: - ERR("incompatible DRBD dialects: " + dev_err(DEV, "incompatible DRBD dialects: " "I support %d-%d, peer supports %d-%d\n", PRO_VERSION_MIN, PRO_VERSION_MAX, p->protocol_min, p->protocol_max); @@ -3788,8 +3809,8 @@ int drbd_do_handshake(struct drbd_conf *mdev) #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) int drbd_do_auth(struct drbd_conf *mdev) { - ERR("This kernel was build without CONFIG_CRYPTO_HMAC.\n"); - ERR("You need to disable 'cram-hmac-alg' in drbd.conf.\n"); + dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); + dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); return 0; } #else @@ -3813,7 +3834,7 @@ int drbd_do_auth(struct drbd_conf *mdev) rv = crypto_hash_setkey(mdev->cram_hmac_tfm, (u8 *)mdev->net_conf->shared_secret, key_len); if (rv) { - ERR("crypto_hash_setkey() failed with %d\n", rv); + dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv); rv = 0; goto fail; } @@ -3829,21 +3850,21 @@ int drbd_do_auth(struct drbd_conf *mdev) goto fail; if (p.command != AuthChallenge) { - ERR("expected AuthChallenge packet, received: %s (0x%04x)\n", + dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n", cmdname(p.command), p.command); rv = 0; goto fail; } if (p.length > CHALLENGE_LEN*2) { - ERR("expected AuthChallenge payload too big.\n"); + dev_err(DEV, "expected AuthChallenge payload too big.\n"); rv = 0; goto fail; } peers_ch = kmalloc(p.length, GFP_KERNEL); if (peers_ch == NULL) { - ERR("kmalloc of peers_ch failed\n"); + dev_err(DEV, "kmalloc of peers_ch failed\n"); rv = 0; goto fail; } @@ -3851,7 +3872,7 @@ int drbd_do_auth(struct drbd_conf *mdev) rv = drbd_recv(mdev, peers_ch, p.length); if (rv != p.length) { - ERR("short read AuthChallenge: l=%u\n", rv); + dev_err(DEV, "short read AuthChallenge: l=%u\n", rv); rv = 0; goto fail; } @@ -3859,7 +3880,7 @@ int drbd_do_auth(struct drbd_conf *mdev) resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm); response = kmalloc(resp_size, GFP_KERNEL); if (response == NULL) { - ERR("kmalloc of response failed\n"); + dev_err(DEV, "kmalloc of response failed\n"); rv = 0; goto fail; } @@ -3869,7 +3890,7 @@ int drbd_do_auth(struct drbd_conf *mdev) rv = crypto_hash_digest(&desc, &sg, sg.length, response); if (rv) { - ERR("crypto_hash_digest() failed with %d\n", rv); + dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv); rv = 0; goto fail; } @@ -3883,14 +3904,14 @@ int drbd_do_auth(struct drbd_conf *mdev) goto fail; if (p.command != AuthResponse) { - ERR("expected AuthResponse packet, received: %s (0x%04x)\n", + dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n", cmdname(p.command), p.command); rv = 0; goto fail; } if (p.length != resp_size) { - ERR("expected AuthResponse payload of wrong size\n"); + dev_err(DEV, "expected AuthResponse payload of wrong size\n"); rv = 0; goto fail; } @@ -3898,14 +3919,14 @@ int drbd_do_auth(struct drbd_conf *mdev) rv = drbd_recv(mdev, response , resp_size); if (rv != resp_size) { - ERR("short read receiving AuthResponse: l=%u\n", rv); + dev_err(DEV, "short read receiving AuthResponse: l=%u\n", rv); rv = 0; goto fail; } right_response = kmalloc(resp_size, GFP_KERNEL); if (response == NULL) { - ERR("kmalloc of right_response failed\n"); + dev_err(DEV, "kmalloc of right_response failed\n"); rv = 0; goto fail; } @@ -3914,7 +3935,7 @@ int drbd_do_auth(struct drbd_conf *mdev) rv = crypto_hash_digest(&desc, &sg, sg.length, right_response); if (rv) { - ERR("crypto_hash_digest() failed with %d\n", rv); + dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv); rv = 0; goto fail; } @@ -3922,7 +3943,7 @@ int drbd_do_auth(struct drbd_conf *mdev) rv = !memcmp(response, right_response, resp_size); if (rv) - INFO("Peer authenticated using %d bytes of '%s' HMAC\n", + dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n", resp_size, mdev->net_conf->cram_hmac_alg); fail: @@ -3942,7 +3963,7 @@ STATIC int drbdd_init(struct Drbd_thread *thi) sprintf(current->comm, "drbd%d_receiver", minor); - INFO("receiver (re)started\n"); + dev_info(DEV, "receiver (re)started\n"); do { h = drbd_connect(mdev); @@ -3952,7 +3973,7 @@ STATIC int drbdd_init(struct Drbd_thread *thi) schedule_timeout(HZ); } if (h == -1) { - drbd_WARN("Discarding network configuration.\n"); + dev_warn(DEV, "Discarding network configuration.\n"); drbd_force_state(mdev, NS(conn, Disconnecting)); } } while (h == 0); @@ -3966,7 +3987,7 @@ STATIC int drbdd_init(struct Drbd_thread *thi) drbd_disconnect(mdev); - INFO("receiver terminated\n"); + dev_info(DEV, "receiver terminated\n"); return 0; } @@ -3982,7 +4003,7 @@ STATIC int got_RqSReply(struct drbd_conf *mdev, struct Drbd_Header *h) set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); } else { set_bit(CL_ST_CHG_FAIL, &mdev->flags); - ERR("Requested state change failed by peer: %s (%d)\n", + dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", set_st_err_name(retcode), retcode); } wake_up(&mdev->state_wait); @@ -4041,7 +4062,7 @@ STATIC int got_BlockAck(struct drbd_conf *mdev, struct Drbd_Header *h) if (unlikely(!req)) { spin_unlock_irq(&mdev->req_lock); - ERR("Got a corrupt block_id/sector pair(2).\n"); + dev_err(DEV, "Got a corrupt block_id/sector pair(2).\n"); return FALSE; } @@ -4060,7 +4081,7 @@ STATIC int got_BlockAck(struct drbd_conf *mdev, struct Drbd_Header *h) break; case DiscardAck: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); - ALERT("Got DiscardAck packet %llus +%u!" + dev_alert(DEV, "Got DiscardAck packet %llus +%u!" " DRBD is not a random data generator!\n", (unsigned long long)req->sector, req->size); _req_mod(req, conflict_discarded_by_peer, 0); @@ -4082,7 +4103,7 @@ STATIC int got_NegAck(struct drbd_conf *mdev, struct Drbd_Header *h) struct drbd_request *req; if (__ratelimit(&drbd_ratelimit_state)) - drbd_WARN("Got NegAck packet. Peer is in troubles?\n"); + dev_warn(DEV, "Got NegAck packet. Peer is in troubles?\n"); update_peer_seq(mdev, be32_to_cpu(p->seq_num)); @@ -4098,7 +4119,7 @@ STATIC int got_NegAck(struct drbd_conf *mdev, struct Drbd_Header *h) if (unlikely(!req)) { spin_unlock_irq(&mdev->req_lock); - ERR("Got a corrupt block_id/sector pair(2).\n"); + dev_err(DEV, "Got a corrupt block_id/sector pair(2).\n"); return FALSE; } @@ -4119,7 +4140,7 @@ STATIC int got_NegDReply(struct drbd_conf *mdev, struct Drbd_Header *h) req = _ar_id_to_req(mdev, p->block_id, sector); if (unlikely(!req)) { spin_unlock_irq(&mdev->req_lock); - ERR("Got a corrupt block_id/sector pair(3).\n"); + dev_err(DEV, "Got a corrupt block_id/sector pair(3).\n"); return FALSE; } @@ -4128,7 +4149,7 @@ STATIC int got_NegDReply(struct drbd_conf *mdev, struct Drbd_Header *h) update_peer_seq(mdev, be32_to_cpu(p->seq_num)); - ERR("Got NegDReply; Sector %llus, len %u; Fail original request.\n", + dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); return TRUE; @@ -4192,7 +4213,7 @@ STATIC int got_OVResult(struct drbd_conf *mdev, struct Drbd_Header *h) w->cb = w_ov_finished; drbd_queue_work_front(&mdev->data.work, w); } else { - ERR("kmalloc(w) failed."); + dev_err(DEV, "kmalloc(w) failed."); drbd_resync_finished(mdev); } } @@ -4264,7 +4285,7 @@ STATIC int drbd_asender(struct Drbd_thread *thi) clear_bit(SIGNAL_ASENDER, &mdev->flags); flush_signals(current); if (!drbd_process_done_ee(mdev)) { - ERR("process_done_ee() = NOT_OK\n"); + dev_err(DEV, "process_done_ee() = NOT_OK\n"); goto reconnect; } /* to avoid race with newly queued ACKs */ @@ -4306,12 +4327,12 @@ STATIC int drbd_asender(struct Drbd_thread *thi) received += rv; buf += rv; } else if (rv == 0) { - ERR("meta connection shut down by peer.\n"); + dev_err(DEV, "meta connection shut down by peer.\n"); goto reconnect; } else if (rv == -EAGAIN) { if (mdev->meta.socket->sk->sk_rcvtimeo == mdev->net_conf->ping_timeo*HZ/10) { - ERR("PingAck did not arrive in time.\n"); + dev_err(DEV, "PingAck did not arrive in time.\n"); goto reconnect; } set_bit(SEND_PING, &mdev->flags); @@ -4319,13 +4340,13 @@ STATIC int drbd_asender(struct Drbd_thread *thi) } else if (rv == -EINTR) { continue; } else { - ERR("sock_recvmsg returned %d\n", rv); + dev_err(DEV, "sock_recvmsg returned %d\n", rv); goto reconnect; } if (received == expect && cmd == NULL) { if (unlikely(h->magic != BE_DRBD_MAGIC)) { - ERR("magic?? on meta m: 0x%lx c: %d l: %d\n", + dev_err(DEV, "magic?? on meta m: 0x%lx c: %d l: %d\n", (long)be32_to_cpu(h->magic), h->command, h->length); goto reconnect; @@ -4333,7 +4354,7 @@ STATIC int drbd_asender(struct Drbd_thread *thi) cmd = get_asender_cmd(be16_to_cpu(h->command)); len = be16_to_cpu(h->length); if (unlikely(cmd == NULL)) { - ERR("unknown command?? on meta m: 0x%lx c: %d l: %d\n", + dev_err(DEV, "unknown command?? on meta m: 0x%lx c: %d l: %d\n", (long)be32_to_cpu(h->magic), h->command, h->length); goto disconnect; @@ -4369,7 +4390,7 @@ disconnect: clear_bit(SIGNAL_ASENDER, &mdev->flags); D_ASSERT(mdev->state.conn < Connected); - INFO("asender terminated\n"); + dev_info(DEV, "asender terminated\n"); return 0; } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c48fc848e5f3..cace6b7d9d27 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -41,7 +41,7 @@ STATIC void _print_rq_state(struct drbd_request *req, const char *txt) bio_data_dir(req->master_bio) == WRITE) ? 'W' : 'R'; - INFO("%s %p %c L%c%c%cN%c%c%c%c%c %u (%llus +%u) %s\n", + dev_info(DEV, "%s %p %c L%c%c%cN%c%c%c%c%c %u (%llus +%u) %s\n", txt, req, rw, s & RQ_LOCAL_PENDING ? 'p' : '-', s & RQ_LOCAL_COMPLETED ? 'c' : '-', @@ -88,7 +88,7 @@ STATIC void _print_req_mod(struct drbd_request *req, enum drbd_req_event what) [completed_ok] = "completed_ok", }; - INFO("_req_mod(%p %c ,%s)\n", req, rw, rq_event_names[what]); + dev_info(DEV, "_req_mod(%p %c ,%s)\n", req, rw, rq_event_names[what]); } # ifdef ENABLE_DYNAMIC_TRACE @@ -170,7 +170,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const drbd_al_complete_io(mdev, req->sector); dec_local(mdev); } else if (__ratelimit(&drbd_ratelimit_state)) { - drbd_WARN("Should have called drbd_al_complete_io(, %llu), " + dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), " "but my Disk seems to have failed :(\n", (unsigned long long) req->sector); } @@ -257,7 +257,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, slot = tl_hash_slot(mdev, sector); hlist_for_each_entry(i, n, slot, colision) { if (OVERLAPS) { - ALERT("LOGIC BUG: completed: %p %llus +%u; " + dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " "other: %p %llus +%u\n", req, (unsigned long long)sector, size, i, (unsigned long long)i->sector, i->size); @@ -423,7 +423,7 @@ STATIC int _req_conflicts(struct drbd_request *req) slot = tl_hash_slot(mdev, sector); hlist_for_each_entry(i, n, slot, colision) { if (OVERLAPS) { - ALERT("%s[%u] Concurrent local write detected! " + dev_alert(DEV, "%s[%u] Concurrent local write detected! " "[DISCARD L] new: %llus +%u; " "pending: %llus +%u\n", current->comm, current->pid, @@ -441,7 +441,7 @@ STATIC int _req_conflicts(struct drbd_request *req) slot = ee_hash_slot(mdev, sector); hlist_for_each_entry(e, n, slot, colision) { if (OVERLAPS) { - ALERT("%s[%u] Concurrent remote write detected!" + dev_alert(DEV, "%s[%u] Concurrent remote write detected!" " [DISCARD L] new: %llus +%u; " "pending: %llus +%u\n", current->comm, current->pid, @@ -484,13 +484,13 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) struct drbd_conf *mdev = req->mdev; if (error && (bio_rw(req->master_bio) != READA)) - ERR("got an _req_mod() errno of %d\n", error); + dev_err(DEV, "got an _req_mod() errno of %d\n", error); print_req_mod(req, what); switch (what) { default: - ERR("LOGIC BUG in %s:%u\n", __FILE__ , __LINE__); + dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__); return; /* does not happen... @@ -535,7 +535,7 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) bio_put(req->private_bio); req->private_bio = NULL; - ALERT("Local WRITE failed sec=%llus size=%u\n", + dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n", (unsigned long long)req->sector, req->size); /* and now: check how to handle local io error. */ __drbd_chk_io_error(mdev, FALSE); @@ -559,7 +559,7 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) break; } /* else */ - ALERT("Local READ failed sec=%llus size=%u\n", + dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", (unsigned long long)req->sector, req->size); /* _req_mod(req,to_be_send); oops, recursion in static inline */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); @@ -805,7 +805,7 @@ STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) dec_ap_bio(mdev); /* only pass the error to the upper layers. * if user cannot handle io errors, thats not our business. */ - ERR("could not kmalloc() req\n"); + dev_err(DEV, "could not kmalloc() req\n"); bio_endio(bio, -ENOMEM); return 0; } @@ -863,7 +863,7 @@ STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) mdev->state.conn >= Connected)); if (!(local || remote)) { - ERR("IO ERROR: neither local nor remote disk\n"); + dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); goto fail_free_complete; } @@ -879,7 +879,7 @@ STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) allocate_barrier: b = kmalloc(sizeof(struct drbd_barrier), GFP_NOIO); if (!b) { - ERR("Failed to alloc barrier.\n"); + dev_err(DEV, "Failed to alloc barrier.\n"); err = -ENOMEM; goto fail_free_complete; } @@ -893,9 +893,9 @@ allocate_barrier: (mdev->state.pdsk == Inconsistent && mdev->state.conn >= Connected)); if (!remote) - drbd_WARN("lost connection while grabbing the req_lock!\n"); + dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); if (!(local || remote)) { - ERR("IO ERROR: neither local nor remote disk\n"); + dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); spin_unlock_irq(&mdev->req_lock); goto fail_free_complete; } @@ -1051,7 +1051,7 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) if (mdev->state.role != Primary && (!allow_oos || is_write)) { if (__ratelimit(&drbd_ratelimit_state)) { - ERR("Process %s[%u] tried to %s; " + dev_err(DEV, "Process %s[%u] tried to %s; " "since we are not in Primary state, " "we cannot allow this\n", current->comm, current->pid, @@ -1071,7 +1071,7 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) */ if (mdev->state.disk < UpToDate && mdev->state.pdsk < UpToDate) { if (__ratelimit(&drbd_ratelimit_state)) - ERR("Sorry, I have no access to good data anymore.\n"); + dev_err(DEV, "Sorry, I have no access to good data anymore.\n"); return 1; } @@ -1097,7 +1097,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit. */ if (unlikely(bio_barrier(bio) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) { - /* drbd_WARN("Rejecting barrier request as underlying device does not support\n"); */ + /* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */ bio_endio(bio, -EOPNOTSUPP); return 0; } @@ -1123,7 +1123,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) * Maybe add our own split-arbitrary-bios function. */ if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) { /* rather error out here than BUG in bio_split */ - ERR("bio would need to, but cannot, be split: " + dev_err(DEV, "bio would need to, but cannot, be split: " "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", bio->bi_vcnt, bio->bi_idx, bio->bi_size, (unsigned long long)bio->bi_sector); diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index f50f95cb9887..742db2c9725c 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -232,7 +232,7 @@ static inline struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, hlist_for_each_entry(req, n, slot, colision) { if ((unsigned long)req == (unsigned long)id) { if (req->sector != sector) { - ERR("_ack_id_to_req: found req %p but it has " + dev_err(DEV, "_ack_id_to_req: found req %p but it has " "wrong sector (%llus versus %llus)\n", req, (unsigned long long)req->sector, (unsigned long long)sector); @@ -241,7 +241,7 @@ static inline struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, return req; } } - ERR("_ack_id_to_req: failed to find req %p, sector %llus in list\n", + dev_err(DEV, "_ack_id_to_req: failed to find req %p, sector %llus in list\n", (void *)(unsigned long)id, (unsigned long long)sector); return NULL; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 57cc537b43d1..685dc71b8a8b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -110,7 +110,7 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) /* strange behaviour of some lower level drivers... * fail the request by clearing the uptodate flag, * but do not return any error?! - * do we want to drbd_WARN() on this? */ + * do we want to dev_warn(DEV, ) on this? */ error = -EIO; } @@ -130,7 +130,7 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) dec_local(mdev); MTRACE(TraceTypeEE, TraceLvlAll, - INFO("Moved EE (READ) to worker sec=%llus size=%u ee=%p\n", + dev_info(DEV, "Moved EE (READ) to worker sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); } @@ -156,7 +156,7 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) /* strange behaviour of some lower level drivers... * fail the request by clearing the uptodate flag, * but do not return any error?! - * do we want to drbd_WARN() on this? */ + * do we want to dev_warn(DEV, ) on this? */ error = -EIO; } @@ -192,7 +192,7 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) list_add_tail(&e->w.list, &mdev->done_ee); MTRACE(TraceTypeEE, TraceLvlAll, - INFO("Moved EE (WRITE) to done_ee sec=%llus size=%u ee=%p\n", + dev_info(DEV, "Moved EE (WRITE) to done_ee sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); @@ -237,7 +237,7 @@ void drbd_endio_pri(struct bio *bio, int error) /* strange behaviour of some lower level drivers... * fail the request by clearing the uptodate flag, * but do not return any error?! - * do we want to drbd_WARN() on this? */ + * do we want to dev_warn(DEV, ) on this? */ error = -EIO; } @@ -268,7 +268,7 @@ int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel) ok = drbd_io_error(mdev, FALSE); if (unlikely(!ok)) - ERR("Sending in w_io_error() failed\n"); + dev_err(DEV, "Sending in w_io_error() failed\n"); return ok; } @@ -287,7 +287,7 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) mdev->state.pdsk <= Inconsistent) { _req_mod(req, send_canceled, 0); spin_unlock_irq(&mdev->req_lock); - ALERT("WE ARE LOST. Local IO failure, no peer.\n"); + dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); return 1; } spin_unlock_irq(&mdev->req_lock); @@ -298,7 +298,7 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { ERR_IF(cancel) return 1; - ERR("resync inactive, but callback triggered??\n"); + dev_err(DEV, "resync inactive, but callback triggered??\n"); return 1; /* Simply ignore this! */ } @@ -351,7 +351,7 @@ STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel CsumRSRequest); kfree(digest); } else { - ERR("kmalloc() of digest failed.\n"); + dev_err(DEV, "kmalloc() of digest failed.\n"); ok = 0; } } else { @@ -362,7 +362,7 @@ STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel drbd_free_ee(mdev, e); if (unlikely(!ok)) - ERR("drbd_send_drequest(..., csum) failed\n"); + dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); return ok; } @@ -438,12 +438,12 @@ int w_make_resync_request(struct drbd_conf *mdev, return 1; if (unlikely(mdev->state.conn < Connected)) { - ERR("Confused in w_make_resync_request()! cstate < Connected"); + dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected"); return 0; } if (mdev->state.conn != SyncTarget) - ERR("%s in w_make_resync_request\n", + dev_err(DEV, "%s in w_make_resync_request\n", conns_to_name(mdev->state.conn)); if (!inc_local(mdev)) { @@ -451,7 +451,7 @@ int w_make_resync_request(struct drbd_conf *mdev, inc_local_if_state(mdev,Failed) would be sufficient, but to continue resync with a broken disk makes no sense at all */ - ERR("Disk broke down during resync!\n"); + dev_err(DEV, "Disk broke down during resync!\n"); mdev->resync_work.cb = w_resync_inactive; return 1; } @@ -550,7 +550,7 @@ next_sector: inc_rs_pending(mdev); if (!drbd_send_drequest(mdev, RSDataRequest, sector, size, ID_SYNCER)) { - ERR("drbd_send_drequest() failed, aborting...\n"); + dev_err(DEV, "drbd_send_drequest() failed, aborting...\n"); dec_rs_pending(mdev); dec_local(mdev); return 0; @@ -586,7 +586,7 @@ int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return 1; if (unlikely(mdev->state.conn < Connected)) { - ERR("Confused in w_make_ov_request()! cstate < Connected"); + dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected"); return 0; } @@ -672,7 +672,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) drbd_queue_work(&mdev->data.work, w); return 1; } - ERR("Warn failed to drbd_rs_del_all() and to kmalloc(w).\n"); + dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n"); } dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; @@ -696,7 +696,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) ns = os; ns.conn = Connected; - INFO("%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", + dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", (os.conn == VerifyS || os.conn == VerifyT) ? "Online verify " : "Resync", dt + mdev->rs_paused, mdev->rs_paused, dbdt); @@ -705,7 +705,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) if (os.conn == VerifyS || os.conn == VerifyT) { if (n_oos) { - ALERT("Online verify found %lu %dk block out of sync!\n", + dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n", n_oos, Bit2KB(1)); khelper_cmd = "out-of-sync"; } @@ -721,7 +721,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) const int ratio = (t == 0) ? 0 : (t < 100000) ? ((s*100)/t) : (s/(t/100)); - INFO("%u %% had equal check sums, eliminated: %luK; " + dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; " "transferred %luK total %luK\n", ratio, Bit2KB(mdev->rs_same_csum), @@ -731,7 +731,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) } if (mdev->rs_failed) { - INFO(" %lu failed blocks\n", mdev->rs_failed); + dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed); if (os.conn == SyncTarget || os.conn == PausedSyncT) { ns.disk = Inconsistent; @@ -752,7 +752,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) drbd_uuid_set(mdev, Bitmap, mdev->bc->md.uuid[Current]); _drbd_uuid_set(mdev, Current, mdev->p_uuid[Current]); } else { - ERR("mdev->p_uuid is NULL! BUG\n"); + dev_err(DEV, "mdev->p_uuid is NULL! BUG\n"); } } @@ -777,7 +777,7 @@ out: mdev->rs_paused = 0; if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) { - drbd_WARN("Writing the whole bitmap, due to failed kmalloc\n"); + dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n"); drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); } @@ -807,7 +807,7 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) ok = drbd_send_block(mdev, DataReply, e); } else { if (__ratelimit(&drbd_ratelimit_state)) - ERR("Sending NegDReply. sector=%llus.\n", + dev_err(DEV, "Sending NegDReply. sector=%llus.\n", (unsigned long long)e->sector); ok = drbd_send_ack(mdev, NegDReply, e); @@ -827,7 +827,7 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) spin_unlock_irq(&mdev->req_lock); if (unlikely(!ok)) - ERR("drbd_send_block() failed\n"); + dev_err(DEV, "drbd_send_block() failed\n"); return ok; } @@ -856,13 +856,13 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) ok = drbd_send_block(mdev, RSDataReply, e); } else { if (__ratelimit(&drbd_ratelimit_state)) - ERR("Not sending RSDataReply, " + dev_err(DEV, "Not sending RSDataReply, " "partner DISKLESS!\n"); ok = 1; } } else { if (__ratelimit(&drbd_ratelimit_state)) - ERR("Sending NegRSDReply. sector %llus.\n", + dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", (unsigned long long)e->sector); ok = drbd_send_ack(mdev, NegRSDReply, e); @@ -885,7 +885,7 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) spin_unlock_irq(&mdev->req_lock); if (unlikely(!ok)) - ERR("drbd_send_block() failed\n"); + dev_err(DEV, "drbd_send_block() failed\n"); return ok; } @@ -934,7 +934,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } else { ok = drbd_send_ack(mdev, NegRSDReply, e); if (__ratelimit(&drbd_ratelimit_state)) - ERR("Sending NegDReply. I guess it gets messy.\n"); + dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); drbd_io_error(mdev, FALSE); } @@ -952,7 +952,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) spin_unlock_irq(&mdev->req_lock); if (unlikely(!ok)) - ERR("drbd_send_block/ack() failed\n"); + dev_err(DEV, "drbd_send_block/ack() failed\n"); return ok; } @@ -963,31 +963,30 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) void *digest; int ok = 1; - if (unlikely(cancel)) { - drbd_free_ee(mdev, e); - dec_unacked(mdev); - return 1; - } + if (unlikely(cancel)) + goto out; - if (likely(drbd_bio_uptodate(e->private_bio))) { - digest_size = crypto_hash_digestsize(mdev->verify_tfm); - digest = kmalloc(digest_size, GFP_KERNEL); - if (digest) { - drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest); - ok = drbd_send_drequest_csum(mdev, e->sector, e->size, - digest, digest_size, OVReply); - if (ok) - inc_rs_pending(mdev); - kfree(digest); - } - } + if (unlikely(!drbd_bio_uptodate(e->private_bio))) + goto out; - dec_unacked(mdev); + digest_size = crypto_hash_digestsize(mdev->verify_tfm); + digest = kmalloc(digest_size, GFP_KERNEL); + if (digest) { + drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest); + ok = drbd_send_drequest_csum(mdev, e->sector, e->size, + digest, digest_size, OVReply); + if (ok) + inc_rs_pending(mdev); + kfree(digest); + } +out: spin_lock_irq(&mdev->req_lock); drbd_free_ee(mdev, e); spin_unlock_irq(&mdev->req_lock); + dec_unacked(mdev); + return ok; } @@ -1036,7 +1035,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } else { ok = drbd_send_ack(mdev, NegRSDReply, e); if (__ratelimit(&drbd_ratelimit_state)) - ERR("Sending NegDReply. I guess it gets messy.\n"); + dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); drbd_io_error(mdev, FALSE); } @@ -1267,7 +1266,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) int r; MTRACE(TraceTypeResync, TraceLvlSummary, - INFO("Resync starting: side=%s\n", + dev_info(DEV, "Resync starting: side=%s\n", side == SyncTarget ? "SyncTarget" : "SyncSource"); ); @@ -1283,7 +1282,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) r = drbd_khelper(mdev, "before-resync-target"); r = (r >> 8) & 0xff; if (r > 0) { - INFO("before-resync-target handler returned %d, " + dev_info(DEV, "before-resync-target handler returned %d, " "dropping connection.\n", r); drbd_force_state(mdev, NS(conn, Disconnecting)); return; @@ -1342,7 +1341,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) dec_local(mdev); if (r == SS_Success) { - INFO("Began resync as %s (will sync %lu KB [%lu bits set]).\n", + dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", conns_to_name(ns.conn), (unsigned long) mdev->rs_total << (BM_BLOCK_SIZE_B-10), (unsigned long) mdev->rs_total); @@ -1423,7 +1422,7 @@ int drbd_worker(struct Drbd_thread *thi) spin_unlock_irq(&mdev->data.work.q_lock); if (!w->cb(mdev, w, mdev->state.conn < Connected)) { - /* drbd_WARN("worker: a callback failed! \n"); */ + /* dev_warn(DEV, "worker: a callback failed! \n"); */ if (mdev->state.conn >= Connected) drbd_force_state(mdev, NS(conn, NetworkFailure)); @@ -1459,7 +1458,7 @@ int drbd_worker(struct Drbd_thread *thi) drbd_thread_stop(&mdev->receiver); drbd_mdev_cleanup(mdev); - INFO("worker terminated\n"); + dev_info(DEV, "worker terminated\n"); return 0; } diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index 3ebacbc7a39f..501ca2ed31f7 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -80,6 +80,12 @@ static inline void drbd_plug_device(struct drbd_conf *mdev) spin_unlock_irq(q->queue_lock); } +static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm) +{ + return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK) + == CRYPTO_ALG_TYPE_HASH; +} + #ifndef __CHECKER__ # undef __cond_lock # define __cond_lock(x,c) (c) diff --git a/drivers/block/drbd/lru_cache.c b/drivers/block/drbd/lru_cache.c index 98b89c1d4188..71858ff5b02c 100644 --- a/drivers/block/drbd/lru_cache.c +++ b/drivers/block/drbd/lru_cache.c @@ -48,6 +48,8 @@ static inline void lc_init(struct lru_cache *lc, struct lc_element *e; unsigned int i; + BUG_ON(!e_count); + memset(lc, 0, bytes); INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); @@ -138,7 +140,6 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) struct lc_element *e; BUG_ON(!lc); - BUG_ON(!lc->nr_elements); hlist_for_each_entry(e, n, lc->slot + lc_hash_fn(lc, enr), colision) { if (e->lc_number == enr) return e; -- cgit v1.2.3 From 40d2a397e602df60b38b8d4e6d0d23eb1b5b2734 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 16 Apr 2009 17:29:55 +0200 Subject: Adding some dev documentation bits, removing outdated comments. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg | 2 ++ Documentation/blockdev/drbd/DRBD-data-packets.svg | 2 ++ Documentation/blockdev/drbd/README.txt | 16 ++++++++++++++++ Documentation/blockdev/drbd/conn-states-8.dot | 18 ++++++++++++++++++ Documentation/blockdev/drbd/disk-states-8.dot | 16 ++++++++++++++++ Documentation/blockdev/drbd/node-states-8.dot | 14 ++++++++++++++ drivers/block/drbd/drbd_req.h | 3 --- 7 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg create mode 100644 Documentation/blockdev/drbd/DRBD-data-packets.svg create mode 100644 Documentation/blockdev/drbd/README.txt create mode 100644 Documentation/blockdev/drbd/conn-states-8.dot create mode 100644 Documentation/blockdev/drbd/disk-states-8.dot create mode 100644 Documentation/blockdev/drbd/node-states-8.dot diff --git a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg new file mode 100644 index 000000000000..cbc68b144b5b --- /dev/null +++ b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg @@ -0,0 +1,2 @@ + +Master slideSlideDrawingDrawingDrawingDrawingRSDataReplyDrawingDrawingCsumRSRequestDrawingw_make_resync_request()Drawingreceive_DataRequest()Drawingdrbd_endio_read_sec()Drawingw_e_end_csum_rs_req()Drawingreceive_RSDataReply()Drawingdrbd_endio_write_sec()Drawinge_end_resync_block()DrawingDrawingWriteAckDrawinggot_BlockAck()DrawingChecksum based Resync, case not in syncDrawingDRBD-8.3 data flowDrawingw_e_send_csum()DrawingDrawingDrawingRSIsInSyncDrawingDrawingCsumRSRequestDrawingreceive_DataRequest()Drawingdrbd_endio_read_sec()Drawingw_e_end_csum_rs_req()Drawinggot_IsInSync()DrawingChecksum based Resync, case in syncDrawingDrawingDrawingDrawingOVReplyDrawingDrawingOVRequestDrawingreceive_OVRequest()Drawingdrbd_endio_read_sec()Drawingw_e_end_ov_req()Drawingreceive_OVReply()Drawingdrbd_endio_read_sec()Drawingw_e_end_ov_reply()DrawingDrawingOVResultDrawinggot_OVResult()DrawingOnline verifyDrawingw_make_ov_request()DrawingDrawingDrawingDrawingDrawingdrbd_endio_read_sec()Drawingw_make_resync_request()Drawingw_e_send_csum()DrawingDrawingdrbd_endio_read_sec()DrawingDrawingDrawingDrawingrs_begin_io()Drawingrs_begin_io()Drawingrs_begin_io()Drawingrs_complete_io()Drawingrs_complete_io()Drawingrs_complete_io()Drawingrs_begin_io()Drawingrs_begin_io()Drawingrs_begin_io()Drawingrs_complete_io()Drawingrs_complete_io()Drawingrs_complete_io() \ No newline at end of file diff --git a/Documentation/blockdev/drbd/DRBD-data-packets.svg b/Documentation/blockdev/drbd/DRBD-data-packets.svg new file mode 100644 index 000000000000..e8ba30e85a95 --- /dev/null +++ b/Documentation/blockdev/drbd/DRBD-data-packets.svg @@ -0,0 +1,2 @@ + +Master slideSlideDrawingDrawingDrawingDrawingRSDataReplyDrawingDrawingRSDataRequestDrawingw_make_resync_request()Drawingreceive_DataRequest()Drawingdrbd_endio_read_sec()Drawingw_e_end_rsdata_req()Drawingreceive_RSDataReply()Drawingdrbd_endio_write_sec()Drawinge_end_resync_block()DrawingDrawingWriteAckDrawinggot_BlockAck()DrawingResync blocks, 4-32KDrawingDrawingDrawingWriteAckDrawingDrawingDataDrawingdrbd_make_request()Drawingreceive_Data()Drawingdrbd_endio_write_sec()Drawinge_end_block()Drawinggot_BlockAck()DrawingRegular mirrored write, 512-32KDrawingw_send_dblock()DrawingDrawingdrbd_endio_write_pri()DrawingDrawingDrawingDataReplyDrawingDrawingDataRequestDrawingdrbd_make_request()Drawingreceive_DataRequest()Drawingdrbd_endio_read_sec()Drawingw_e_end_data_req()Drawingreceive_DataReply()DrawingDiskless read, 512-32KDrawingw_send_read_req()DrawingDRBD 8 data flowDrawingDrawingDrawingDrawingal_begin_io()Drawingal_complete_io()Drawingrs_begin_io()Drawingrs_complete_io()Drawingrs_begin_io()Drawingrs_complete_io() \ No newline at end of file diff --git a/Documentation/blockdev/drbd/README.txt b/Documentation/blockdev/drbd/README.txt new file mode 100644 index 000000000000..627b0a1bf35e --- /dev/null +++ b/Documentation/blockdev/drbd/README.txt @@ -0,0 +1,16 @@ +Description + + DRBD is a shared-nothing, synchronously replicated block device. It + is designed to serve as a building block for high availability + clusters and in this context, is a "drop-in" replacement for shared + storage. Simplistically, you could see it as a network RAID 1. + + Please visit http://www.drbd.org to find out more. + +The here included files are intended to help understand the implementation + +DRBD-8.3-data-packets.svg, DRBD-data-packets.svg + relates some functions, and write packets. + +conn-states-8.dot, disk-states-8.dot, node-states-8.dot + The sub graphs of DRBD's state transitions diff --git a/Documentation/blockdev/drbd/conn-states-8.dot b/Documentation/blockdev/drbd/conn-states-8.dot new file mode 100644 index 000000000000..025e8cf5e64a --- /dev/null +++ b/Documentation/blockdev/drbd/conn-states-8.dot @@ -0,0 +1,18 @@ +digraph conn_states { + StandAllone -> WFConnection [ label = "ioctl_set_net()" ] + WFConnection -> Unconnected [ label = "unable to bind()" ] + WFConnection -> WFReportParams [ label = "in connect() after accept" ] + WFReportParams -> StandAllone [ label = "checks in receive_param()" ] + WFReportParams -> Connected [ label = "in receive_param()" ] + WFReportParams -> WFBitMapS [ label = "sync_handshake()" ] + WFReportParams -> WFBitMapT [ label = "sync_handshake()" ] + WFBitMapS -> SyncSource [ label = "receive_bitmap()" ] + WFBitMapT -> SyncTarget [ label = "receive_bitmap()" ] + SyncSource -> Connected + SyncTarget -> Connected + SyncSource -> PausedSyncS + SyncTarget -> PausedSyncT + PausedSyncS -> SyncSource + PausedSyncT -> SyncTarget + Connected -> WFConnection [ label = "* on network error" ] +} diff --git a/Documentation/blockdev/drbd/disk-states-8.dot b/Documentation/blockdev/drbd/disk-states-8.dot new file mode 100644 index 000000000000..d06cfb46fb98 --- /dev/null +++ b/Documentation/blockdev/drbd/disk-states-8.dot @@ -0,0 +1,16 @@ +digraph disk_states { + Diskless -> Inconsistent [ label = "ioctl_set_disk()" ] + Diskless -> Consistent [ label = "ioctl_set_disk()" ] + Diskless -> Outdated [ label = "ioctl_set_disk()" ] + Consistent -> Outdated [ label = "receive_param()" ] + Consistent -> UpToDate [ label = "receive_param()" ] + Consistent -> Inconsistent [ label = "start resync" ] + Outdated -> Inconsistent [ label = "start resync" ] + UpToDate -> Inconsistent [ label = "ioctl_replicate" ] + Inconsistent -> UpToDate [ label = "resync completed" ] + Consistent -> Failed [ label = "io completion error" ] + Outdated -> Failed [ label = "io completion error" ] + UpToDate -> Failed [ label = "io completion error" ] + Inconsistent -> Failed [ label = "io completion error" ] + Failed -> Diskless [ label = "sending notify to peer" ] +} diff --git a/Documentation/blockdev/drbd/node-states-8.dot b/Documentation/blockdev/drbd/node-states-8.dot new file mode 100644 index 000000000000..4a2b00c23547 --- /dev/null +++ b/Documentation/blockdev/drbd/node-states-8.dot @@ -0,0 +1,14 @@ +digraph node_states { + Secondary -> Primary [ label = "ioctl_set_state()" ] + Primary -> Secondary [ label = "ioctl_set_state()" ] +} + +digraph peer_states { + Secondary -> Primary [ label = "recv state packet" ] + Primary -> Secondary [ label = "recv state packet" ] + Primary -> Unknown [ label = "connection lost" ] + Secondary -> Unknown [ label = "connection lost" ] + Unknown -> Primary [ label = "connected" ] + Unknown -> Secondary [ label = "connected" ] +} + diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 742db2c9725c..6c7c9635da30 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -75,9 +75,6 @@ * it will be destroyed, * and completion will be signalled to the originator, * with or without "success". - * - * See also documentation/drbd-request-state-overview.dot - * (dot -Tps2 documentation/drbd-request-state-overview.dot | display -) */ enum drbd_req_event { -- cgit v1.2.3 From 67ebb6d6d27fdc9462d6719c8066899da3db51d7 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 17 Apr 2009 15:22:29 +0200 Subject: Cleanups in drbd.h, and drbd-connection-state-overview.dot Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- .../drbd/drbd-connection-state-overview.dot | 85 ++++++++++++++++++++++ drivers/block/drbd/drbd_buildtag.c | 4 +- drivers/block/drbd/drbd_nl.c | 1 + drivers/block/drbd/drbd_proc.c | 1 + include/linux/drbd.h | 21 +----- 5 files changed, 93 insertions(+), 19 deletions(-) create mode 100644 Documentation/blockdev/drbd/drbd-connection-state-overview.dot diff --git a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot new file mode 100644 index 000000000000..6d9cf0a7b11d --- /dev/null +++ b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot @@ -0,0 +1,85 @@ +// vim: set sw=2 sts=2 : +digraph { + rankdir=BT + bgcolor=white + + node [shape=plaintext] + node [fontcolor=black] + + StandAlone [ style=filled,fillcolor=gray,label=StandAlone ] + + node [fontcolor=lightgray] + + Unconnected [ label=Unconnected ] + + CommTrouble [ shape=record, + label="{communication loss|{Timeout|BrokenPipe|NetworkFailure}}" ] + + node [fontcolor=gray] + + subgraph cluster_try_connect { + label="try to connect, handshake" + rank=max + WFConnection [ label=WFConnection ] + WFReportParams [ label=WFReportParams ] + } + + TearDown [ label=TearDown ] + + Connected [ label=Connected,style=filled,fillcolor=green,fontcolor=black ] + + node [fontcolor=lightblue] + + StartingSyncS [ label=StartingSyncS ] + StartingSyncT [ label=StartingSyncT ] + + subgraph cluster_bitmap_exchange { + node [fontcolor=red] + fontcolor=red + label="new application (WRITE?) requests blocked\lwhile bitmap is exchanged" + + WFBitMapT [ label=WFBitMapT ] + WFSyncUUID [ label=WFSyncUUID ] + WFBitMapS [ label=WFBitMapS ] + } + + node [fontcolor=blue] + + cluster_resync [ shape=record,label="{resynchronisation process running\l'concurrent' application requests allowed|{{PausedSyncT\nSyncTarget}|{PausedSyncS\nSyncSource}}}" ] + + node [shape=box,fontcolor=black] + + // drbdadm [label="drbdadm connect"] + // handshake [label="drbd_connect()\ndrbd_do_handshake\ndrbd_sync_handshake() etc."] + // comm_error [label="communication trouble"] + + // + // edges + // -------------------------------------- + + StandAlone -> Unconnected [ label="drbdadm connect" ] + Unconnected -> StandAlone [ label="drbdadm disconnect\lor serious communication trouble" ] + Unconnected -> WFConnection [ label="receiver thread is started" ] + WFConnection -> WFReportParams [ headlabel="accept()\land/or \lconnect()\l" ] + + WFReportParams -> StandAlone [ label="during handshake\lpeers do not agree\labout something essential" ] + WFReportParams -> Connected [ label="data identical\lno sync needed",color=green,fontcolor=green ] + + WFReportParams -> WFBitMapS + WFReportParams -> WFBitMapT + WFBitMapT -> WFSyncUUID [minlen=0.1,constraint=false] + + WFBitMapS -> cluster_resync:S + WFSyncUUID -> cluster_resync:T + + edge [color=green] + cluster_resync:any -> Connected [ label="resnyc done",fontcolor=green ] + + edge [color=red] + WFReportParams -> CommTrouble + Connected -> CommTrouble + cluster_resync:any -> CommTrouble + edge [color=black] + CommTrouble -> Unconnected [label="receiver thread is stopped" ] + +} diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c index 368298ce9f68..a057f0a3d098 100644 --- a/drivers/block/drbd/drbd_buildtag.c +++ b/drivers/block/drbd/drbd_buildtag.c @@ -2,6 +2,6 @@ #include const char *drbd_buildtag(void) { - return "GIT-hash: 2f1a0b8bfe96b74872f056b4d0fc14faa6c87b3b drbd/drbd_actlog.c drbd/drbd_bitmap.c drbd/drbd_int.h drbd/drbd_main.c drbd/drbd_nl.c drbd/drbd_receiver.c drbd/drbd_req.c drbd/drbd_req.h drbd/drbd_worker.c" - " build by phil@fat-tyre, 2009-04-16 15:21:58"; + return "GIT-hash: ae6080852f8359c8c175f90c3f3daa01409e1d1c drbd/linux/drbd.h" + " build by phil@fat-tyre, 2009-04-17 15:14:48"; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b3ddc467386f..020c66741bd1 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 0e271975c0bf..b209da0ff4ea 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "drbd_int.h" #include "lru_cache.h" /* for lc_sprintf_stats */ diff --git a/include/linux/drbd.h b/include/linux/drbd.h index a494853e35ce..748d1cb20d42 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -25,7 +25,6 @@ */ #ifndef DRBD_H #define DRBD_H -#include #include #include @@ -39,9 +38,9 @@ #include /* Altough the Linux source code makes a difference between - generic endiness and the bitfields' endianess, there is no - architecture as of Linux-2.6.24-rc4 where the bitfileds' endianess - does not match the generic endianess. */ + generic endianness and the bitfields' endianness, there is no + architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness + does not match the generic endianness. */ #if __BYTE_ORDER == __LITTLE_ENDIAN #define __LITTLE_ENDIAN_BITFIELD @@ -86,8 +85,7 @@ enum after_sb_handler { Violently }; -/* KEEP the order, do not delete or insert! - * Or change the API_VERSION, too. */ +/* KEEP the order, do not delete or insert. Only append. */ enum ret_codes { RetCodeBase = 100, NoError, /* 101 ... */ @@ -245,13 +243,6 @@ union drbd_state_t { unsigned role:2 ; /* 3/4 primary/secondary/unknown */ #else # error "this endianess is not supported" -#endif -#ifndef DRBD_DEBUG_STATE_CHANGES -#define DRBD_DEBUG_STATE_CHANGES 0 -#endif -#if DRBD_DEBUG_STATE_CHANGES - unsigned int line; - const char *func; #endif }; unsigned int i; @@ -290,10 +281,6 @@ extern const char *roles_to_name(enum drbd_role); extern const char *disks_to_name(enum drbd_disk_state); extern const char *set_st_err_name(enum set_st_err); -#ifndef BDEVNAME_SIZE -# define BDEVNAME_SIZE 32 -#endif - #define SHARED_SECRET_MAX 64 enum MetaDataFlags { -- cgit v1.2.3 From 1ff8d0aa6fb4011b8f49dcb79919946c1bd32196 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 22 Apr 2009 12:23:38 +0200 Subject: Removing CamelCase, minor cleanups 9b8b170585e2959d552b0ba592b649d758b933b8 Merge branch 'drbd-8.3' into drbd-8.3-cleanups 1922274c822b34cf4b66843819ccfed97e1da4f1 Improvements to drbd_nl_net_conf() * Removing macros * Fixed the code for 1a59b007715215697968cfaed3f2f159d262c030 Removing a workaround for bugs in blk_queue_stack_limits() in older kernels 56d788e4fa7a4809fc41f8c17a02032fb8793080 Merge branch 'drbd-8.3' into drbd-8.3-cleanups a95b4cfb595ced59ca6d2b26d450fd7e5e08fec8 The coding style guide says: Constants are ALL_CAPS 84cc8259557954b39fa8b0c57931b08cdc3df3f6 Merge branch 'drbd-8.2' into drbd-8.3 555b9f742cce70430a5769a35128ee3fa06fa9f4 Merge branch 'drbd-8.0' into drbd-8.2 f96e2776b2e49ef992d43a624d7c6be7d0610bfa Removing CamelCase from structs, enums and unions 727aca1fcc001381ab6f0a7229fa7db54b9431ea Removing '#if 0' code 546cbdec6779e997a37eac0d3762f4e0a3a390f2 Changing the remaining constants to UPPER_CASE only 57e478a0d328405ed97c3bada9d617e5d21a3868 Merge branch 'drbd-8.3' into drbd-8.3-cleanups 23b99bfb4ceca5a00c180620ac3f6db91edf7f85 Andi does not want to get it CC, he prefers to pick it off LKML d362ab639c966107670f4b375cbff757314f5cba Changing the remaining constants to UPPER_CASE only 6fe98c4f0302a28174dd3a852e72fbc6a0caa45e Merge branch 'drbd-8.2' into drbd-8.3 ff327744475ffc66795fc49dcdc232b21589185e Changing the remaining constants to UPPER_CASE only 185392bf5ce63936a5c424b97e38512ab4bcfdb7 Merge branch 'drbd-8.0' into drbd-8.2 5f87618f7272e9ef92b17b2b7c36dcc1c3d59031 Making all constants UPPER_CASE only 2ebf5224634bf5fda709fb54b43a5de6e3c10bee Adding explicit numbers to the return codes to user space Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 58 +- drivers/block/drbd/drbd_bitmap.c | 4 +- drivers/block/drbd/drbd_buildtag.c | 4 +- drivers/block/drbd/drbd_int.h | 676 +++++++++++------------ drivers/block/drbd/drbd_main.c | 1040 ++++++++++++++++++------------------ drivers/block/drbd/drbd_nl.c | 576 ++++++++++---------- drivers/block/drbd/drbd_proc.c | 16 +- drivers/block/drbd/drbd_receiver.c | 916 ++++++++++++++++--------------- drivers/block/drbd/drbd_req.c | 94 ++-- drivers/block/drbd/drbd_req.h | 4 +- drivers/block/drbd/drbd_strings.c | 118 ++-- drivers/block/drbd/drbd_worker.c | 210 ++++---- include/linux/drbd.h | 346 ++++++------ include/linux/drbd_limits.h | 14 +- include/linux/drbd_nl.h | 2 +- 15 files changed, 2012 insertions(+), 2066 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 90ad8cbeafee..fbbddd3d7f31 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -223,7 +223,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) D_ASSERT(atomic_read(&mdev->local_cnt) > 0); - MTRACE(TraceTypeALExts, TraceLvlMetrics, + MTRACE(TRACE_TYPE_AL_EXTS, TRACE_LVL_METRICS, dev_info(DEV, "al_begin_io( sec=%llus (al_enr=%u) (rs_enr=%d) )\n", (unsigned long long) sector, enr, (int)BM_SECT_TO_EXT(sector)); @@ -260,7 +260,7 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) struct lc_element *extent; unsigned long flags; - MTRACE(TraceTypeALExts, TraceLvlMetrics, + MTRACE(TRACE_TYPE_AL_EXTS, TRACE_LVL_METRICS, dev_info(DEV, "al_complete_io( sec=%llus (al_enr=%u) (rs_enr=%d) )\n", (unsigned long long) sector, enr, (int)BM_SECT_TO_EXT(sector)); @@ -305,7 +305,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) * TODO reduce maximum latency: * submit both bios, then wait for both, * instead of doing two synchronous sector writes. */ - if (mdev->state.conn < Connected && evicted != LC_FREE) + if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT); mutex_lock(&mdev->md_io_mutex); /* protects md_io_page, al_tr_cycle, ... */ @@ -607,7 +607,7 @@ STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev, atomic_inc(&wc->count); /* we already know that we may do this... - * inc_local_if_state(mdev,Attaching); + * inc_local_if_state(mdev,D_ATTACHING); * just get the extra reference, so that the local_cnt reflects * the number of pending IO requests DRBD at its backing device. */ @@ -629,7 +629,7 @@ out_bio_put: * drbd_al_to_on_disk_bm: * Writes the areas of the bitmap which are covered by the AL. * called when we detach (unconfigure) local storage, - * or when we go from Primary to Secondary state. + * or when we go from R_PRIMARY to R_SECONDARY state. */ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) { @@ -638,7 +638,7 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) struct bio **bios; struct drbd_atodb_wait wc; - ERR_IF (!inc_local_if_state(mdev, Attaching)) + ERR_IF (!inc_local_if_state(mdev, D_ATTACHING)) return; /* sorry, I don't have any act_log etc... */ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); @@ -763,7 +763,7 @@ static inline int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) lc_del(mdev->act_log, al_ext); spin_unlock_irq(&mdev->al_lock); - MTRACE(TraceTypeALExts, TraceLvlMetrics, + MTRACE(TRACE_TYPE_AL_EXTS, TRACE_LVL_METRICS, if (unlikely(!rv)) dev_info(DEV, "Waiting for extent in drbd_al_shrink()\n"); ); @@ -810,8 +810,8 @@ STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) { switch (mdev->state.conn) { - case SyncSource: case SyncTarget: - case PausedSyncS: case PausedSyncT: + case C_SYNC_SOURCE: case C_SYNC_TARGET: + case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T: drbd_resync_finished(mdev); default: /* nothing to do */ @@ -860,7 +860,7 @@ STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, dump_stack(); lc_put(mdev->resync, &ext->lce); - drbd_force_state(mdev, NS(conn, Disconnecting)); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return; } } else { @@ -916,7 +916,7 @@ STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, * size byte of data starting from sector. Only clear a bits of the affected * one ore more _aligned_ BM_BLOCK_SIZE blocks. * - * called by worker on SyncTarget and receiver on SyncSource. + * called by worker on C_SYNC_TARGET and receiver on SyncSource. * */ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, @@ -953,7 +953,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); - MTRACE(TraceTypeResync, TraceLvlMetrics, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_METRICS, dev_info(DEV, "drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", (unsigned long long)sector, size, sbnr, ebnr); ); @@ -973,8 +973,8 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, /* should be roling marks, * but we estimate only anyways. */ if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) && - mdev->state.conn != PausedSyncT && - mdev->state.conn != PausedSyncS) { + mdev->state.conn != C_PAUSED_SYNC_T && + mdev->state.conn != C_PAUSED_SYNC_S) { mdev->rs_mark_time = jiffies; mdev->rs_mark_left = drbd_bm_total_weight(mdev); } @@ -1032,7 +1032,7 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, sbnr = BM_SECT_TO_BIT(sector); ebnr = BM_SECT_TO_BIT(esector); - MTRACE(TraceTypeResync, TraceLvlMetrics, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_METRICS, dev_info(DEV, "drbd_set_out_of_sync: sector=%llus size=%u " "sbnr=%lu ebnr=%lu\n", (unsigned long long)sector, size, sbnr, ebnr); @@ -1133,7 +1133,7 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; int i, sig; - MTRACE(TraceTypeResync, TraceLvlAll, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, dev_info(DEV, "drbd_rs_begin_io: sector=%llus (rs_end=%d)\n", (unsigned long long)sector, enr); ); @@ -1183,7 +1183,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; int i; - MTRACE(TraceTypeResync, TraceLvlAll, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, dev_info(DEV, "drbd_try_rs_begin_io: sector=%llus\n", (unsigned long long)sector); ); @@ -1203,7 +1203,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) * the lc_put here... * we also have to wake_up */ - MTRACE(TraceTypeResync, TraceLvlAll, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, dev_info(DEV, "dropping %u, aparently got 'synced' " "by application io\n", mdev->resync_wenr); ); @@ -1232,7 +1232,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) * but then could not set BME_LOCKED, * so we tried again. * drop the extra reference. */ - MTRACE(TraceTypeResync, TraceLvlAll, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, dev_info(DEV, "dropping extra reference on %u\n", enr); ); bm_ext->lce.refcnt--; @@ -1241,7 +1241,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) goto check_al; } else { if (mdev->resync_locked > mdev->resync->nr_elements-3) { - MTRACE(TraceTypeResync, TraceLvlAll, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, dev_info(DEV, "resync_locked = %u!\n", mdev->resync_locked); ); goto try_again; @@ -1268,7 +1268,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) goto check_al; } check_al: - MTRACE(TraceTypeResync, TraceLvlAll, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, dev_info(DEV, "checking al for %u\n", enr); ); for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { @@ -1284,7 +1284,7 @@ proceed: return 0; try_again: - MTRACE(TraceTypeResync, TraceLvlAll, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, dev_info(DEV, "need to try again for %u\n", enr); ); if (bm_ext) @@ -1299,7 +1299,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; unsigned long flags; - MTRACE(TraceTypeResync, TraceLvlAll, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, dev_info(DEV, "drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n", (long long)sector, enr); ); @@ -1336,13 +1336,13 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) */ void drbd_rs_cancel_all(struct drbd_conf *mdev) { - MTRACE(TraceTypeResync, TraceLvlMetrics, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_METRICS, dev_info(DEV, "drbd_rs_cancel_all\n"); ); spin_lock_irq(&mdev->al_lock); - if (inc_local_if_state(mdev, Failed)) { /* Makes sure ->resync is there. */ + if (inc_local_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */ lc_reset(mdev->resync); dec_local(mdev); } @@ -1363,13 +1363,13 @@ int drbd_rs_del_all(struct drbd_conf *mdev) struct bm_extent *bm_ext; int i; - MTRACE(TraceTypeResync, TraceLvlMetrics, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_METRICS, dev_info(DEV, "drbd_rs_del_all\n"); ); spin_lock_irq(&mdev->al_lock); - if (inc_local_if_state(mdev, Failed)) { + if (inc_local_if_state(mdev, D_FAILED)) { /* ok, ->resync is there. */ for (i = 0; i < mdev->resync->nr_elements; i++) { bm_ext = (struct bm_extent *) lc_entry(mdev->resync, i); @@ -1406,7 +1406,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) /* Record information on a failure to resync the specified blocks * - * called on SyncTarget when resync write fails or NegRSDReply received + * called on C_SYNC_TARGET when resync write fails or P_NEG_RS_DREPLY received * */ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) @@ -1417,7 +1417,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) sector_t esector, nr_sectors; int wake_up = 0; - MTRACE(TraceTypeResync, TraceLvlSummary, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_SUMMARY, dev_info(DEV, "drbd_rs_failed_io: sector=%llus, size=%u\n", (unsigned long long)sector, size); ); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index d8e6d493012e..e602c778e712 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -580,7 +580,7 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) unsigned long flags; /* if I don't have a disk, I don't know about out-of-sync status */ - if (!inc_local_if_state(mdev, Negotiating)) + if (!inc_local_if_state(mdev, D_NEGOTIATING)) return 0; ERR_IF(!b) return 0; @@ -842,7 +842,7 @@ STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) drbd_blk_run_queue(bdev_get_queue(mdev->bc->md_bdev)); wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); - MTRACE(TraceTypeMDIO, TraceLvlSummary, + MTRACE(TRACE_TYPE_MD_IO, TRACE_LVL_SUMMARY, dev_info(DEV, "%s of bitmap took %lu jiffies\n", rw == READ ? "reading" : "writing", jiffies - now); ); diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c index a057f0a3d098..617078b3dc33 100644 --- a/drivers/block/drbd/drbd_buildtag.c +++ b/drivers/block/drbd/drbd_buildtag.c @@ -2,6 +2,6 @@ #include const char *drbd_buildtag(void) { - return "GIT-hash: ae6080852f8359c8c175f90c3f3daa01409e1d1c drbd/linux/drbd.h" - " build by phil@fat-tyre, 2009-04-17 15:14:48"; + return "GIT-hash: 1a59b007715215697968cfaed3f2f159d262c030 drbd/drbd_nl.c" + " build by phil@fat-tyre, 2009-04-22 11:36:29"; } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1b51051e2fcf..94138cc08943 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -181,116 +181,113 @@ extern struct ratelimit_state drbd_ratelimit_state; * on the wire *********************************************************************/ -enum Drbd_Packet_Cmd { +enum drbd_packets { /* receiver (data socket) */ - Data = 0x00, - DataReply = 0x01, /* Response to DataRequest */ - RSDataReply = 0x02, /* Response to RSDataRequest */ - Barrier = 0x03, - ReportBitMap = 0x04, - BecomeSyncTarget = 0x05, - BecomeSyncSource = 0x06, - UnplugRemote = 0x07, /* Used at various times to hint the peer */ - DataRequest = 0x08, /* Used to ask for a data block */ - RSDataRequest = 0x09, /* Used to ask for a data block for resync */ - SyncParam = 0x0a, - ReportProtocol = 0x0b, - ReportUUIDs = 0x0c, - ReportSizes = 0x0d, - ReportState = 0x0e, - ReportSyncUUID = 0x0f, - AuthChallenge = 0x10, - AuthResponse = 0x11, - StateChgRequest = 0x12, + P_DATA = 0x00, + P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */ + P_RS_DATA_REPLY = 0x02, /* Response to P_RS_DATA_REQUEST */ + P_BARRIER = 0x03, + P_BITMAP = 0x04, + P_BECOME_SYNC_TARGET = 0x05, + P_BECOME_SYNC_SOURCE = 0x06, + P_UNPLUG_REMOTE = 0x07, /* Used at various times to hint the peer */ + P_DATA_REQUEST = 0x08, /* Used to ask for a data block */ + P_RS_DATA_REQUEST = 0x09, /* Used to ask for a data block for resync */ + P_SYNC_PARAM = 0x0a, + P_PROTOCOL = 0x0b, + P_UUIDS = 0x0c, + P_SIZES = 0x0d, + P_STATE = 0x0e, + P_SYNC_UUID = 0x0f, + P_AUTH_CHALLENGE = 0x10, + P_AUTH_RESPONSE = 0x11, + P_STATE_CHG_REQ = 0x12, /* asender (meta socket */ - Ping = 0x13, - PingAck = 0x14, - RecvAck = 0x15, /* Used in protocol B */ - WriteAck = 0x16, /* Used in protocol C */ - RSWriteAck = 0x17, /* Is a WriteAck, additionally call set_in_sync(). */ - DiscardAck = 0x18, /* Used in proto C, two-primaries conflict detection */ - NegAck = 0x19, /* Sent if local disk is unusable */ - NegDReply = 0x1a, /* Local disk is broken... */ - NegRSDReply = 0x1b, /* Local disk is broken... */ - BarrierAck = 0x1c, - StateChgReply = 0x1d, + P_PING = 0x13, + P_PING_ACK = 0x14, + P_RECV_ACK = 0x15, /* Used in protocol B */ + P_WRITE_ACK = 0x16, /* Used in protocol C */ + P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */ + P_DISCARD_ACK = 0x18, /* Used in proto C, two-primaries conflict detection */ + P_NEG_ACK = 0x19, /* Sent if local disk is unusable */ + P_NEG_DREPLY = 0x1a, /* Local disk is broken... */ + P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */ + P_BARRIER_ACK = 0x1c, + P_STATE_CHG_REPLY = 0x1d, /* "new" commands, no longer fitting into the ordering scheme above */ - OVRequest = 0x1e, /* data socket */ - OVReply = 0x1f, - OVResult = 0x20, /* meta socket */ - CsumRSRequest = 0x21, /* data socket */ - RSIsInSync = 0x22, /* meta socket */ - SyncParam89 = 0x23, /* data socket, protocol version 89 replacement for SyncParam */ - ReportCBitMap = 0x24, /* compressed or otherwise encoded bitmap transfer */ + P_OV_REQUEST = 0x1e, /* data socket */ + P_OV_REPLY = 0x1f, + P_OV_RESULT = 0x20, /* meta socket */ + P_CSUM_RS_REQUEST = 0x21, /* data socket */ + P_RS_IS_IN_SYNC = 0x22, /* meta socket */ + P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */ + P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */ - MAX_CMD = 0x25, - MayIgnore = 0x100, /* Flag to test if (cmd > MayIgnore) ... */ - MAX_OPT_CMD = 0x101, + P_MAX_CMD = 0x25, + P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ + P_MAX_OPT_CMD = 0x101, /* special command ids for handshake */ - HandShakeM = 0xfff1, /* First Packet on the MetaSock */ - HandShakeS = 0xfff2, /* First Packet on the Socket */ + P_HAND_SHAKE_M = 0xfff1, /* First Packet on the MetaSock */ + P_HAND_SHAKE_S = 0xfff2, /* First Packet on the Socket */ - HandShake = 0xfffe /* FIXED for the next century! */ + P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ }; -static inline const char *cmdname(enum Drbd_Packet_Cmd cmd) +static inline const char *cmdname(enum drbd_packets cmd) { /* THINK may need to become several global tables * when we want to support more than * one PRO_VERSION */ static const char *cmdnames[] = { - [Data] = "Data", - [DataReply] = "DataReply", - [RSDataReply] = "RSDataReply", - [Barrier] = "Barrier", - [ReportBitMap] = "ReportBitMap", - [BecomeSyncTarget] = "BecomeSyncTarget", - [BecomeSyncSource] = "BecomeSyncSource", - [UnplugRemote] = "UnplugRemote", - [DataRequest] = "DataRequest", - [RSDataRequest] = "RSDataRequest", - [SyncParam] = "SyncParam", - [SyncParam89] = "SyncParam89", - [ReportProtocol] = "ReportProtocol", - [ReportUUIDs] = "ReportUUIDs", - [ReportSizes] = "ReportSizes", - [ReportState] = "ReportState", - [ReportSyncUUID] = "ReportSyncUUID", - [AuthChallenge] = "AuthChallenge", - [AuthResponse] = "AuthResponse", - [Ping] = "Ping", - [PingAck] = "PingAck", - [RecvAck] = "RecvAck", - [WriteAck] = "WriteAck", - [RSWriteAck] = "RSWriteAck", - [DiscardAck] = "DiscardAck", - [NegAck] = "NegAck", - [NegDReply] = "NegDReply", - [NegRSDReply] = "NegRSDReply", - [BarrierAck] = "BarrierAck", - [StateChgRequest] = "StateChgRequest", - [StateChgReply] = "StateChgReply", - [OVRequest] = "OVRequest", - [OVReply] = "OVReply", - [OVResult] = "OVResult", - [CsumRSRequest] = "CsumRSRequest", - [RSIsInSync] = "RSIsInSync", - [ReportCBitMap] = "ReportCBitMap", - [MAX_CMD] = NULL, + [P_DATA] = "Data", + [P_DATA_REPLY] = "DataReply", + [P_RS_DATA_REPLY] = "RSDataReply", + [P_BARRIER] = "Barrier", + [P_BITMAP] = "ReportBitMap", + [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget", + [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource", + [P_UNPLUG_REMOTE] = "UnplugRemote", + [P_DATA_REQUEST] = "DataRequest", + [P_RS_DATA_REQUEST] = "RSDataRequest", + [P_SYNC_PARAM] = "SyncParam", + [P_SYNC_PARAM89] = "SyncParam89", + [P_PROTOCOL] = "ReportProtocol", + [P_UUIDS] = "ReportUUIDs", + [P_SIZES] = "ReportSizes", + [P_STATE] = "ReportState", + [P_SYNC_UUID] = "ReportSyncUUID", + [P_AUTH_CHALLENGE] = "AuthChallenge", + [P_AUTH_RESPONSE] = "AuthResponse", + [P_PING] = "Ping", + [P_PING_ACK] = "PingAck", + [P_RECV_ACK] = "RecvAck", + [P_WRITE_ACK] = "WriteAck", + [P_RS_WRITE_ACK] = "RSWriteAck", + [P_DISCARD_ACK] = "DiscardAck", + [P_NEG_ACK] = "NegAck", + [P_NEG_DREPLY] = "NegDReply", + [P_NEG_RS_DREPLY] = "NegRSDReply", + [P_BARRIER_ACK] = "BarrierAck", + [P_STATE_CHG_REQ] = "StateChgRequest", + [P_STATE_CHG_REPLY] = "StateChgReply", + [P_OV_REQUEST] = "OVRequest", + [P_OV_REPLY] = "OVReply", + [P_OV_RESULT] = "OVResult", + [P_MAX_CMD] = NULL, }; - if (cmd == HandShakeM) + if (cmd == P_HAND_SHAKE_M) return "HandShakeM"; - if (cmd == HandShakeS) + if (cmd == P_HAND_SHAKE_S) return "HandShakeS"; - if (cmd == HandShake) + if (cmd == P_HAND_SHAKE) return "HandShake"; - if (cmd >= MAX_CMD) + if (cmd >= P_MAX_CMD) return "Unknown"; return cmdnames[cmd]; } @@ -308,7 +305,7 @@ struct bm_xfer_ctx { unsigned long bit_offset; unsigned long word_offset; - /* statistics; index: (h->command == ReportBitMap) */ + /* statistics; index: (h->command == P_BITMAP) */ unsigned packets[2]; unsigned bytes[2]; }; @@ -345,7 +342,7 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) * NOTE that the payload starts at a long aligned offset, * regardless of 32 or 64 bit arch! */ -struct Drbd_Header { +struct p_header { u32 magic; u16 command; u16 length; /* bytes of data after this header */ @@ -354,19 +351,19 @@ struct Drbd_Header { /* 8 bytes. packet FIXED for the next century! */ /* - * short commands, packets without payload, plain Drbd_Header: - * Ping - * PingAck - * BecomeSyncTarget - * BecomeSyncSource - * UnplugRemote + * short commands, packets without payload, plain p_header: + * P_PING + * P_PING_ACK + * P_BECOME_SYNC_TARGET + * P_BECOME_SYNC_SOURCE + * P_UNPLUG_REMOTE */ /* * commands with out-of-struct payload: - * ReportBitMap (no additional fields) - * Data, DataReply (see Drbd_Data_Packet) - * ReportCBitMap (see receive_compressed_bitmap) + * P_BITMAP (no additional fields) + * P_DATA, P_DATA_REPLY (see p_data) + * P_COMPRESSED_BITMAP (see receive_compressed_bitmap) */ /* these defines must not be changed without changing the protocol version */ @@ -374,8 +371,8 @@ struct Drbd_Header { #define DP_RW_SYNC 2 #define DP_MAY_SET_IN_SYNC 4 -struct Drbd_Data_Packet { - struct Drbd_Header head; +struct p_data { + struct p_header head; u64 sector; /* 64 bits sector number */ u64 block_id; /* to identify the request in protocol B&C */ u32 seq_num; @@ -384,14 +381,14 @@ struct Drbd_Data_Packet { /* * commands which share a struct: - * Drbd_BlockAck_Packet: - * RecvAck (proto B), WriteAck (proto C), - * DiscardAck (proto C, two-primaries conflict detection) - * Drbd_BlockRequest_Packet: - * DataRequest, RSDataRequest + * p_block_ack: + * P_RECV_ACK (proto B), P_WRITE_ACK (proto C), + * P_DISCARD_ACK (proto C, two-primaries conflict detection) + * p_block_req: + * P_DATA_REQUEST, P_RS_DATA_REQUEST */ -struct Drbd_BlockAck_Packet { - struct Drbd_Header head; +struct p_block_ack { + struct p_header head; u64 sector; u64 block_id; u32 blksize; @@ -399,8 +396,8 @@ struct Drbd_BlockAck_Packet { } __attribute((packed)); -struct Drbd_BlockRequest_Packet { - struct Drbd_Header head; +struct p_block_req { + struct p_header head; u64 sector; u64 block_id; u32 blksize; @@ -409,15 +406,15 @@ struct Drbd_BlockRequest_Packet { /* * commands with their own struct for additional fields: - * HandShake - * Barrier - * BarrierAck - * SyncParam + * P_HAND_SHAKE + * P_BARRIER + * P_BARRIER_ACK + * P_SYNC_PARAM * ReportParams */ -struct Drbd_HandShake_Packet { - struct Drbd_Header head; /* 8 bytes */ +struct p_handshake { + struct p_header head; /* 8 bytes */ u32 protocol_min; u32 feature_flags; u32 protocol_max; @@ -431,36 +428,36 @@ struct Drbd_HandShake_Packet { } __attribute((packed)); /* 80 bytes, FIXED for the next century */ -struct Drbd_Barrier_Packet { - struct Drbd_Header head; +struct p_barrier { + struct p_header head; u32 barrier; /* barrier number _handle_ only */ u32 pad; /* to multiple of 8 Byte */ } __attribute((packed)); -struct Drbd_BarrierAck_Packet { - struct Drbd_Header head; +struct p_barrier_ack { + struct p_header head; u32 barrier; u32 set_size; } __attribute((packed)); -struct Drbd_SyncParam_Packet { - struct Drbd_Header head; +struct p_rs_param { + struct p_header head; u32 rate; /* Since protocol version 88 and higher. */ char verify_alg[0]; } __attribute((packed)); -struct Drbd_SyncParam89_Packet { - struct Drbd_Header head; +struct p_rs_param_89 { + struct p_header head; u32 rate; /* protocol version 89: */ char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; } __attribute((packed)); -struct Drbd_Protocol_Packet { - struct Drbd_Header head; +struct p_protocol { + struct p_header head; u32 protocol; u32 after_sb_0p; u32 after_sb_1p; @@ -473,18 +470,18 @@ struct Drbd_Protocol_Packet { } __attribute((packed)); -struct Drbd_GenCnt_Packet { - struct Drbd_Header head; - u64 uuid[EXT_UUID_SIZE]; +struct p_uuids { + struct p_header head; + u64 uuid[UI_EXTENDED_SIZE]; } __attribute((packed)); -struct Drbd_SyncUUID_Packet { - struct Drbd_Header head; +struct p_rs_uuid { + struct p_header head; u64 uuid; } __attribute((packed)); -struct Drbd_Sizes_Packet { - struct Drbd_Header head; +struct p_sizes { + struct p_header head; u64 d_size; /* size of disk */ u64 u_size; /* user requested size */ u64 c_size; /* current exported size */ @@ -492,23 +489,23 @@ struct Drbd_Sizes_Packet { u32 queue_order_type; } __attribute((packed)); -struct Drbd_State_Packet { - struct Drbd_Header head; +struct p_state { + struct p_header head; u32 state; } __attribute((packed)); -struct Drbd_Req_State_Packet { - struct Drbd_Header head; +struct p_req_state { + struct p_header head; u32 mask; u32 val; } __attribute((packed)); -struct Drbd_RqS_Reply_Packet { - struct Drbd_Header head; +struct p_req_state_reply { + struct p_header head; u32 retcode; } __attribute((packed)); -struct Drbd06_Parameter_P { +struct p_drbd06_param { u64 size; u32 state; u32 blksize; @@ -518,8 +515,8 @@ struct Drbd06_Parameter_P { u32 bit_map_gen[5]; } __attribute((packed)); -struct Drbd_Discard_Packet { - struct Drbd_Header head; +struct p_discard { + struct p_header head; u64 block_id; u32 seq_num; u32 pad; @@ -527,7 +524,7 @@ struct Drbd_Discard_Packet { /* Valid values for the encoding field. * Bump proto version when changing this. */ -enum Drbd_bitmap_code { +enum drbd_bitmap_code { RLE_VLI_Bytes = 0, RLE_VLI_BitsFibD_0_1 = 1, RLE_VLI_BitsFibD_1_1 = 2, @@ -536,9 +533,9 @@ enum Drbd_bitmap_code { RLE_VLI_BitsFibD_3_5 = 5, }; -struct Drbd_Compressed_Bitmap_Packet { - struct Drbd_Header head; - /* (encoding & 0x0f): actual encoding, see enum Drbd_bitmap_code +struct p_compressed_bm { + struct p_header head; + /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code * (encoding & 0x80): polarity (set/unset) of first runlength * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits * used to pad up to head.length bytes @@ -548,93 +545,93 @@ struct Drbd_Compressed_Bitmap_Packet { u8 code[0]; } __attribute((packed)); -static inline enum Drbd_bitmap_code -DCBP_get_code(struct Drbd_Compressed_Bitmap_Packet *p) +static inline enum drbd_bitmap_code +DCBP_get_code(struct p_compressed_bm *p) { - return (enum Drbd_bitmap_code)(p->encoding & 0x0f); + return (enum drbd_bitmap_code)(p->encoding & 0x0f); } static inline void -DCBP_set_code(struct Drbd_Compressed_Bitmap_Packet *p, enum Drbd_bitmap_code code) +DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code) { BUG_ON(code & ~0xf); p->encoding = (p->encoding & ~0xf) | code; } static inline int -DCBP_get_start(struct Drbd_Compressed_Bitmap_Packet *p) +DCBP_get_start(struct p_compressed_bm *p) { return (p->encoding & 0x80) != 0; } static inline void -DCBP_set_start(struct Drbd_Compressed_Bitmap_Packet *p, int set) +DCBP_set_start(struct p_compressed_bm *p, int set) { p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0); } static inline int -DCBP_get_pad_bits(struct Drbd_Compressed_Bitmap_Packet *p) +DCBP_get_pad_bits(struct p_compressed_bm *p) { return (p->encoding >> 4) & 0x7; } static inline void -DCBP_set_pad_bits(struct Drbd_Compressed_Bitmap_Packet *p, int n) +DCBP_set_pad_bits(struct p_compressed_bm *p, int n) { BUG_ON(n & ~0x7); p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4); } -/* one bitmap packet, including the Drbd_Header, +/* one bitmap packet, including the p_header, * should fit within one _architecture independend_ page. * so we need to use the fixed size 4KiB page size * most architechtures have used for a long time. */ -#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct Drbd_Header)) +#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header)) #define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) -#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct Drbd_Compressed_Bitmap_Packet)) +#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm)) #if (PAGE_SIZE < 4096) /* drbd_send_bitmap / receive_bitmap would break horribly */ #error "PAGE_SIZE too small" #endif -union Drbd_Polymorph_Packet { - struct Drbd_Header head; - struct Drbd_HandShake_Packet HandShake; - struct Drbd_Data_Packet Data; - struct Drbd_BlockAck_Packet BlockAck; - struct Drbd_Barrier_Packet Barrier; - struct Drbd_BarrierAck_Packet BarrierAck; - struct Drbd_SyncParam89_Packet SyncParam89; - struct Drbd_Protocol_Packet Protocol; - struct Drbd_Sizes_Packet Sizes; - struct Drbd_GenCnt_Packet GenCnt; - struct Drbd_State_Packet State; - struct Drbd_Req_State_Packet ReqState; - struct Drbd_RqS_Reply_Packet RqSReply; - struct Drbd_BlockRequest_Packet BlockRequest; +union p_polymorph { + struct p_header header; + struct p_handshake handshake; + struct p_data data; + struct p_block_ack block_ack; + struct p_barrier barrier; + struct p_barrier_ack barrier_ack; + struct p_rs_param_89 rs_param_89; + struct p_protocol protocol; + struct p_sizes sizes; + struct p_uuids uuids; + struct p_state state; + struct p_req_state req_state; + struct p_req_state_reply req_state_reply; + struct p_block_req block_req; } __attribute((packed)); /**********************************************************************/ -enum Drbd_thread_state { +enum drbd_thread_state { None, Running, Exiting, Restarting }; -struct Drbd_thread { +struct drbd_thread { spinlock_t t_lock; struct task_struct *task; struct completion stop; - enum Drbd_thread_state t_state; - int (*function) (struct Drbd_thread *); + enum drbd_thread_state t_state; + int (*function) (struct drbd_thread *); struct drbd_conf *mdev; int reset_cpu_mask; }; -static inline enum Drbd_thread_state get_t_state(struct Drbd_thread *thi) +static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) { /* THINK testing the t_state seems to be uncritical in all cases * (but thread_{start,stop}), so we can read it *without* the lock. @@ -649,7 +646,7 @@ static inline enum Drbd_thread_state get_t_state(struct Drbd_thread *thi) * Having this as the first member of a struct provides sort of "inheritance". * "derived" structs can be "drbd_queue_work()"ed. * The callback should know and cast back to the descendant struct. - * drbd_request and Tl_epoch_entry are descendants of drbd_work. + * drbd_request and drbd_epoch_entry are descendants of drbd_work. */ struct drbd_work; typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); @@ -658,7 +655,7 @@ struct drbd_work { drbd_work_cb cb; }; -struct drbd_barrier; +struct drbd_tl_epoch; struct drbd_request { struct drbd_work w; struct drbd_conf *mdev; @@ -673,7 +670,7 @@ struct drbd_request { * starting a new epoch... */ - /* up to here, the struct layout is identical to Tl_epoch_entry; + /* up to here, the struct layout is identical to drbd_epoch_entry; * we might be able to use that to our advantage... */ struct list_head tl_requests; /* ring list in the transfer log */ @@ -683,10 +680,10 @@ struct drbd_request { unsigned long start_time; }; -struct drbd_barrier { +struct drbd_tl_epoch { struct drbd_work w; struct list_head requests; /* requests before */ - struct drbd_barrier *next; /* pointer to the next barrier */ + struct drbd_tl_epoch *next; /* pointer to the next barrier */ unsigned int br_number; /* the barriers identifier. */ int n_req; /* number of requests attached before this barrier */ }; @@ -696,8 +693,8 @@ struct drbd_request; /* These Tl_epoch_entries may be in one of 6 lists: active_ee .. data packet being written sync_ee .. syncer block being written - done_ee .. block written, need to send WriteAck - read_ee .. [RS]DataRequest being read + done_ee .. block written, need to send P_WRITE_ACK + read_ee .. [RS]P_DATA_REQUEST being read */ struct drbd_epoch { @@ -717,7 +714,7 @@ enum { DE_IS_FINISHING, }; -struct Tl_epoch_entry { +struct drbd_epoch_entry { struct drbd_work w; struct drbd_conf *mdev; struct bio *private_bio; @@ -752,7 +749,7 @@ enum { /* global flag bits */ enum { - CREATE_BARRIER, /* next Data is preceeded by a Barrier */ + CREATE_BARRIER, /* next P_DATA is preceeded by a P_BARRIER */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ SEND_PING, /* whether asender should send a ping asap */ WORK_PENDING, /* completion flag for drbd_disconnect */ @@ -767,7 +764,7 @@ enum { CL_ST_CHG_FAIL, CRASHED_PRIMARY, /* This node was a crashed primary. * Gets cleared when the state.conn - * goes into Connected state. */ + * goes into C_CONNECTED state. */ WRITE_BM_AFTER_RESYNC, /* A kmalloc() during resync failed */ NO_BARRIER_SUPP, /* underlying block device doesn't implement barriers */ CONSIDER_RESYNC, @@ -806,15 +803,15 @@ struct drbd_socket { struct socket *socket; /* this way we get our * send/receive buffers off the stack */ - union Drbd_Polymorph_Packet sbuf; - union Drbd_Polymorph_Packet rbuf; + union p_polymorph sbuf; + union p_polymorph rbuf; }; struct drbd_md { u64 md_offset; /* sector offset to 'super' block */ u64 la_size_sect; /* last agreed size, unit sectors */ - u64 uuid[UUID_SIZE]; + u64 uuid[UI_SIZE]; u64 device_uuid; u32 flags; u32 md_size_sect; @@ -892,9 +889,9 @@ struct drbd_conf { struct timer_list md_sync_timer; /* Used after attach while negotiating new disk state. */ - union drbd_state_t new_state_tmp; + union drbd_state new_state_tmp; - union drbd_state_t state; + union drbd_state state; wait_queue_head_t misc_wait; wait_queue_head_t state_wait; /* upon each state change. */ unsigned int send_cnt; @@ -910,9 +907,9 @@ struct drbd_conf { atomic_t local_cnt; /* Waiting for local completion */ atomic_t net_cnt; /* Users of net_conf */ spinlock_t req_lock; - struct drbd_barrier *unused_spare_barrier; /* for pre-allocation */ - struct drbd_barrier *newest_barrier; - struct drbd_barrier *oldest_barrier; + struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ + struct drbd_tl_epoch *newest_tle; + struct drbd_tl_epoch *oldest_tle; struct list_head out_of_sequence_requests; struct hlist_head *tl_hash; unsigned int tl_hash_s; @@ -940,9 +937,9 @@ struct drbd_conf { struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; - struct Drbd_thread receiver; - struct Drbd_thread worker; - struct Drbd_thread asender; + struct drbd_thread receiver; + struct drbd_thread worker; + struct drbd_thread asender; struct drbd_bitmap *bitmap; unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ @@ -968,7 +965,7 @@ struct drbd_conf { unsigned int ee_hash_s; /* this one is protected by ee_lock, single thread */ - struct Tl_epoch_entry *last_write_w_barrier; + struct drbd_epoch_entry *last_write_w_barrier; int next_barrier_nr; struct hlist_head *app_reads_hash; /* is proteced by req_lock */ @@ -1049,26 +1046,26 @@ static inline void drbd_put_data_sock(struct drbd_conf *mdev) /* drbd_main.c */ enum chg_state_flags { - ChgStateHard = 1, - ChgStateVerbose = 2, - ChgWaitComplete = 4, - ChgSerialize = 8, - ChgOrdered = ChgWaitComplete + ChgSerialize, + CS_HARD = 1, + CS_VERBOSE = 2, + CS_WAIT_COMPLETE = 4, + CS_SERIALIZE = 8, + CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, }; extern void drbd_init_set_defaults(struct drbd_conf *mdev); extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, - union drbd_state_t mask, union drbd_state_t val); -extern void drbd_force_state(struct drbd_conf *, union drbd_state_t, - union drbd_state_t); -extern int _drbd_request_state(struct drbd_conf *, union drbd_state_t, - union drbd_state_t, enum chg_state_flags); -extern int __drbd_set_state(struct drbd_conf *, union drbd_state_t, + union drbd_state mask, union drbd_state val); +extern void drbd_force_state(struct drbd_conf *, union drbd_state, + union drbd_state); +extern int _drbd_request_state(struct drbd_conf *, union drbd_state, + union drbd_state, enum chg_state_flags); +extern int __drbd_set_state(struct drbd_conf *, union drbd_state, enum chg_state_flags, struct completion *done); -extern void print_st_err(struct drbd_conf *, union drbd_state_t, - union drbd_state_t, int); -extern int drbd_thread_start(struct Drbd_thread *thi); -extern void _drbd_thread_stop(struct Drbd_thread *thi, int restart, int wait); +extern void print_st_err(struct drbd_conf *, union drbd_state, + union drbd_state, int); +extern int drbd_thread_start(struct drbd_thread *thi); +extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); #ifdef CONFIG_SMP extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); extern cpumask_t drbd_calc_cpu_mask(struct drbd_conf *mdev); @@ -1080,7 +1077,7 @@ extern void drbd_free_resources(struct drbd_conf *mdev); extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, unsigned int set_size); extern void tl_clear(struct drbd_conf *mdev); -extern void _tl_add_barrier(struct drbd_conf *, struct drbd_barrier *); +extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_conf *mdev); extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, void *buf, size_t size, unsigned msg_flags); @@ -1092,39 +1089,39 @@ extern int drbd_send_sizes(struct drbd_conf *mdev); extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum Drbd_Packet_Cmd cmd, struct Drbd_Header *h, + enum drbd_packets cmd, struct p_header *h, size_t size, unsigned msg_flags); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum Drbd_Packet_Cmd cmd, struct Drbd_Header *h, + enum drbd_packets cmd, struct p_header *h, size_t size); -extern int drbd_send_cmd2(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, +extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size); extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); -extern int drbd_send_ack(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, - struct Tl_epoch_entry *e); -extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, - struct Drbd_BlockRequest_Packet *rp); -extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, - struct Drbd_Data_Packet *dp); -extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, +extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, + struct drbd_epoch_entry *e); +extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, + struct p_block_req *rp); +extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, + struct p_data *dp); +extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, sector_t sector, int blksize, u64 block_id); extern int _drbd_send_page(struct drbd_conf *mdev, struct page *page, int offset, size_t size); -extern int drbd_send_block(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, - struct Tl_epoch_entry *e); +extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, + struct drbd_epoch_entry *e); extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); extern int _drbd_send_barrier(struct drbd_conf *mdev, - struct drbd_barrier *barrier); + struct drbd_tl_epoch *barrier); extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, sector_t sector, int size, u64 block_id); extern int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector,int size, void *digest, int digest_size, - enum Drbd_Packet_Cmd cmd); + enum drbd_packets cmd); extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size); extern int drbd_send_bitmap(struct drbd_conf *mdev); @@ -1356,25 +1353,25 @@ extern int trace_devs; extern int trace_level; enum { - TraceLvlAlways = 0, - TraceLvlSummary, - TraceLvlMetrics, - TraceLvlAll, - TraceLvlMax + TRACE_LVL_ALWAYS = 0, + TRACE_LVL_SUMMARY, + TRACE_LVL_METRICS, + TRACE_LVL_ALL, + TRACE_LVL_MAX }; enum { - TraceTypePacket = 0x00000001, - TraceTypeRq = 0x00000002, - TraceTypeUuid = 0x00000004, - TraceTypeResync = 0x00000008, - TraceTypeEE = 0x00000010, - TraceTypeUnplug = 0x00000020, - TraceTypeNl = 0x00000040, - TraceTypeALExts = 0x00000080, - TraceTypeIntRq = 0x00000100, - TraceTypeMDIO = 0x00000200, - TraceTypeEpochs = 0x00000400, + TRACE_TYPE_PACKET = 0x00000001, + TRACE_TYPE_RQ = 0x00000002, + TRACE_TYPE_UUID = 0x00000004, + TRACE_TYPE_RESYNC = 0x00000008, + TRACE_TYPE_EE = 0x00000010, + TRACE_TYPE_UNPLUG = 0x00000020, + TRACE_TYPE_NL = 0x00000040, + TRACE_TYPE_AL_EXTS = 0x00000080, + TRACE_TYPE_INT_RQ = 0x00000100, + TRACE_TYPE_MD_IO = 0x00000200, + TRACE_TYPE_EPOCHS = 0x00000400, }; static inline int @@ -1423,28 +1420,28 @@ extern void _dump_bio(const char *pfx, struct drbd_conf *mdev, struct bio *bio, static inline void dump_bio(struct drbd_conf *mdev, struct bio *bio, int complete, struct drbd_request *r) { - MTRACE(TraceTypeRq, TraceLvlSummary, + MTRACE(TRACE_TYPE_RQ, TRACE_LVL_SUMMARY, _dump_bio("Rq", mdev, bio, complete, r); ); } static inline void dump_internal_bio(const char *pfx, struct drbd_conf *mdev, struct bio *bio, int complete) { - MTRACE(TraceTypeIntRq, TraceLvlSummary, + MTRACE(TRACE_TYPE_INT_RQ, TRACE_LVL_SUMMARY, _dump_bio(pfx, mdev, bio, complete, NULL); ); } /* Packet dumping support */ extern void _dump_packet(struct drbd_conf *mdev, struct socket *sock, - int recv, union Drbd_Polymorph_Packet *p, + int recv, union p_polymorph *p, char *file, int line); static inline void dump_packet(struct drbd_conf *mdev, struct socket *sock, - int recv, union Drbd_Polymorph_Packet *p, char *file, int line) + int recv, union p_polymorph *p, char *file, int line) { - MTRACE(TraceTypePacket, TraceLvlSummary, + MTRACE(TRACE_TYPE_PACKET, TRACE_LVL_SUMMARY, _dump_packet(mdev, sock, recv, p, file, line); ); } @@ -1472,8 +1469,8 @@ extern void drbd_resume_io(struct drbd_conf *mdev); extern char *ppsize(char *buf, unsigned long long size); extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *); -enum determin_dev_size_enum { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; -extern enum determin_dev_size_enum drbd_determin_dev_size(struct drbd_conf *) __must_hold(local); +enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; +extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *) __must_hold(local); extern void resync_after_online_grow(struct drbd_conf *); extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, @@ -1482,7 +1479,7 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); /* drbd_worker.c */ -extern int drbd_worker(struct Drbd_thread *thi); +extern int drbd_worker(struct drbd_thread *thi); extern void drbd_alter_sa(struct drbd_conf *mdev, int na); extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); extern void resume_next_sg(struct drbd_conf *mdev); @@ -1529,12 +1526,12 @@ extern void resync_timer_fn(unsigned long data); /* drbd_receiver.c */ extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); -extern struct Tl_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, +extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, unsigned int data_size, gfp_t gfp_mask) __must_hold(local); -extern void drbd_free_ee(struct drbd_conf *mdev, struct Tl_epoch_entry *e); +extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e); extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head); extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, @@ -1620,12 +1617,12 @@ extern void drbd_al_shrink(struct drbd_conf *mdev); void drbd_nl_cleanup(void); int __init drbd_nl_init(void); -void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state_t); +void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state); void drbd_bcast_sync_progress(struct drbd_conf *mdev); void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, const char* seen_hash, const char* calc_hash, - const struct Tl_epoch_entry* e); + const struct drbd_epoch_entry* e); /** DRBD State macros: @@ -1640,37 +1637,40 @@ void drbd_bcast_ee(struct drbd_conf *mdev, * Besides the basic forms NS() and _NS() additional _?NS[23] are defined * to express state changes that affect more than one aspect of the state. * - * E.g. NS2(conn, Connected, peer, Secondary) + * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY) * Means that the network connection was established and that the peer * is in secondary role. */ -#define peer_mask role_mask -#define pdsk_mask disk_mask -#define susp_mask 1 -#define user_isp_mask 1 -#define aftr_isp_mask 1 +#define role_MASK R_MASK +#define peer_MASK R_MASK +#define disk_MASK D_MASK +#define pdsk_MASK D_MASK +#define conn_MASK C_MASK +#define susp_MASK 1 +#define user_isp_MASK 1 +#define aftr_isp_MASK 1 #define NS(T, S) \ - ({ union drbd_state_t mask; mask.i = 0; mask.T = T##_mask; mask; }), \ - ({ union drbd_state_t val; val.i = 0; val.T = (S); val; }) + ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T = (S); val; }) #define NS2(T1, S1, T2, S2) \ - ({ union drbd_state_t mask; mask.i = 0; mask.T1 = T1##_mask; \ - mask.T2 = T2##_mask; mask; }), \ - ({ union drbd_state_t val; val.i = 0; val.T1 = (S1); \ + ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ + mask.T2 = T2##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ val.T2 = (S2); val; }) #define NS3(T1, S1, T2, S2, T3, S3) \ - ({ union drbd_state_t mask; mask.i = 0; mask.T1 = T1##_mask; \ - mask.T2 = T2##_mask; mask.T3 = T3##_mask; mask; }), \ - ({ union drbd_state_t val; val.i = 0; val.T1 = (S1); \ + ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ + mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ val.T2 = (S2); val.T3 = (S3); val; }) #define _NS(D, T, S) \ - D, ({ union drbd_state_t __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) #define _NS2(D, T1, S1, T2, S2) \ - D, ({ union drbd_state_t __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ __ns.T2 = (S2); __ns; }) #define _NS3(D, T1, S1, T2, S2, T3, S3) \ - D, ({ union drbd_state_t __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) /* @@ -1690,7 +1690,7 @@ static inline void drbd_state_unlock(struct drbd_conf *mdev) } static inline int _drbd_set_state(struct drbd_conf *mdev, - union drbd_state_t ns, enum chg_state_flags flags, + union drbd_state ns, enum chg_state_flags flags, struct completion *done) { int rv; @@ -1703,10 +1703,10 @@ static inline int _drbd_set_state(struct drbd_conf *mdev, } static inline int drbd_request_state(struct drbd_conf *mdev, - union drbd_state_t mask, - union drbd_state_t val) + union drbd_state mask, + union drbd_state val) { - return _drbd_request_state(mdev, mask, val, ChgStateVerbose + ChgOrdered); + return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); } /** @@ -1716,17 +1716,17 @@ static inline int drbd_request_state(struct drbd_conf *mdev, static inline void __drbd_chk_io_error(struct drbd_conf *mdev, int forcedetach) { switch (mdev->bc->dc.on_io_error) { - case PassOn: + case EP_PASS_ON: if (!forcedetach) { if (printk_ratelimit()) dev_err(DEV, "Local IO failed. Passing error on...\n"); break; } /* NOTE fall through to detach case if forcedetach set */ - case Detach: - case CallIOEHelper: - if (mdev->state.disk > Failed) { - _drbd_set_state(_NS(mdev, disk, Failed), ChgStateHard, NULL); + case EP_DETACH: + case EP_CALL_HELPER: + if (mdev->state.disk > D_FAILED) { + _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); dev_err(DEV, "Local IO failed. Detaching...\n"); } break; @@ -1878,35 +1878,35 @@ static inline void request_ping(struct drbd_conf *mdev) } static inline int drbd_send_short_cmd(struct drbd_conf *mdev, - enum Drbd_Packet_Cmd cmd) + enum drbd_packets cmd) { - struct Drbd_Header h; + struct p_header h; return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); } static inline int drbd_send_ping(struct drbd_conf *mdev) { - struct Drbd_Header h; - return drbd_send_cmd(mdev, USE_META_SOCKET, Ping, &h, sizeof(h)); + struct p_header h; + return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); } static inline int drbd_send_ping_ack(struct drbd_conf *mdev) { - struct Drbd_Header h; - return drbd_send_cmd(mdev, USE_META_SOCKET, PingAck, &h, sizeof(h)); + struct p_header h; + return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); } -static inline void drbd_thread_stop(struct Drbd_thread *thi) +static inline void drbd_thread_stop(struct drbd_thread *thi) { _drbd_thread_stop(thi, FALSE, TRUE); } -static inline void drbd_thread_stop_nowait(struct Drbd_thread *thi) +static inline void drbd_thread_stop_nowait(struct drbd_thread *thi) { _drbd_thread_stop(thi, FALSE, FALSE); } -static inline void drbd_thread_restart_nowait(struct Drbd_thread *thi) +static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) { _drbd_thread_stop(thi, TRUE, FALSE); } @@ -1925,7 +1925,7 @@ static inline void drbd_thread_restart_nowait(struct Drbd_thread *thi) * _req_mod(req, data_received) * [from receive_DataReply] * _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked) - * [from got_BlockAck (WriteAck, RecvAck)] + * [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)] * for some reason it is NOT decreased in got_NegAck, * but in the resulting cleanup code from report_params. * we should try to remember the reason for that... @@ -1952,9 +1952,9 @@ static inline void inc_ap_pending(struct drbd_conf *mdev) /* counts how many resync-related answers we still expect from the peer * increase decrease - * SyncTarget sends RSDataRequest (and expects RSDataReply) - * SyncSource sends RSDataReply (and expects WriteAck whith ID_SYNCER) - * (or NegAck with ID_SYNCER) + * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY) + * C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK whith ID_SYNCER) + * (or P_NEG_ACK with ID_SYNCER) */ static inline void inc_rs_pending(struct drbd_conf *mdev) { @@ -1969,11 +1969,11 @@ static inline void inc_rs_pending(struct drbd_conf *mdev) /* counts how many answers we still need to send to the peer. * increased on * receive_Data unless protocol A; - * we need to send a RecvAck (proto B) - * or WriteAck (proto C) - * receive_RSDataReply (recv_resync_read) we need to send a WriteAck - * receive_DataRequest (receive_RSDataRequest) we need to send back Data - * receive_Barrier_* we need to send a BarrierAck + * we need to send a P_RECV_ACK (proto B) + * or P_WRITE_ACK (proto C) + * receive_RSDataReply (recv_resync_read) we need to send a P_WRITE_ACK + * receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA + * receive_Barrier_* we need to send a P_BARRIER_ACK */ static inline void inc_unacked(struct drbd_conf *mdev) { @@ -2006,7 +2006,7 @@ static inline int inc_net(struct drbd_conf *mdev) int have_net_conf; atomic_inc(&mdev->net_cnt); - have_net_conf = mdev->state.conn >= Unconnected; + have_net_conf = mdev->state.conn >= C_UNCONNECTED; if (!have_net_conf) dec_net(mdev); return have_net_conf; @@ -2017,7 +2017,7 @@ static inline int inc_net(struct drbd_conf *mdev) * TRUE you should call dec_local() after IO is completed. */ #define inc_local_if_state(M,MINS) __cond_lock(local, _inc_local_if_state(M,MINS)) -#define inc_local(M) __cond_lock(local, _inc_local_if_state(M,Inconsistent)) +#define inc_local(M) __cond_lock(local, _inc_local_if_state(M,D_INCONSISTENT)) static inline void dec_local(struct drbd_conf *mdev) { @@ -2093,7 +2093,7 @@ static inline int drbd_get_max_buffers(struct drbd_conf *mdev) return mxb; } -static inline int drbd_state_is_stable(union drbd_state_t s) +static inline int drbd_state_is_stable(union drbd_state s) { /* DO NOT add a default clause, we want the compiler to warn us @@ -2101,54 +2101,54 @@ static inline int drbd_state_is_stable(union drbd_state_t s) switch ((enum drbd_conns)s.conn) { /* new io only accepted when there is no connection, ... */ - case StandAlone: - case WFConnection: + case C_STANDALONE: + case C_WF_CONNECTION: /* ... or there is a well established connection. */ - case Connected: - case SyncSource: - case SyncTarget: - case VerifyS: - case VerifyT: - case PausedSyncS: - case PausedSyncT: + case C_CONNECTED: + case C_SYNC_SOURCE: + case C_SYNC_TARGET: + case C_VERIFY_S: + case C_VERIFY_T: + case C_PAUSED_SYNC_S: + case C_PAUSED_SYNC_T: /* maybe stable, look at the disk state */ break; /* no new io accepted during tansitional states * like handshake or teardown */ - case Disconnecting: - case Unconnected: - case Timeout: - case BrokenPipe: - case NetworkFailure: - case ProtocolError: - case TearDown: - case WFReportParams: - case StartingSyncS: - case StartingSyncT: - case WFBitMapS: - case WFBitMapT: - case WFSyncUUID: - case conn_mask: + case C_DISCONNECTING: + case C_UNCONNECTED: + case C_TIMEOUT: + case C_BROKEN_PIPE: + case C_NETWORK_FAILURE: + case C_PROTOCOL_ERROR: + case C_TEAR_DOWN: + case C_WF_REPORT_PARAMS: + case C_STARTING_SYNC_S: + case C_STARTING_SYNC_T: + case C_WF_BITMAP_S: + case C_WF_BITMAP_T: + case C_WF_SYNC_UUID: + case C_MASK: /* not "stable" */ return 0; } switch ((enum drbd_disk_state)s.disk) { - case Diskless: - case Inconsistent: - case Outdated: - case Consistent: - case UpToDate: + case D_DISKLESS: + case D_INCONSISTENT: + case D_OUTDATED: + case D_CONSISTENT: + case D_UP_TO_DATE: /* disk state is stable as well. */ break; /* no new io accepted during tansitional states */ - case Attaching: - case Failed: - case Negotiating: - case DUnknown: - case disk_mask: + case D_ATTACHING: + case D_FAILED: + case D_NEGOTIATING: + case D_UNKNOWN: + case D_MASK: /* not "stable" */ return 0; } @@ -2188,7 +2188,7 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) { /* compare with after_state_ch, - * os.conn != WFBitMapS && ns.conn == WFBitMapS */ + * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ DEFINE_WAIT(wait); /* we wait here @@ -2232,7 +2232,7 @@ static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) { mdev->ed_uuid = val; - MTRACE(TraceTypeUuid, TraceLvlMetrics, + MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, dev_info(DEV, " exposed data uuid now %016llX\n", (unsigned long long)val); ); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index dfb48e2c1bc5..4c84365aeeef 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -59,22 +59,22 @@ struct after_state_chg_work { struct drbd_work w; - union drbd_state_t os; - union drbd_state_t ns; + union drbd_state os; + union drbd_state ns; enum chg_state_flags flags; struct completion *done; }; -int drbdd_init(struct Drbd_thread *); -int drbd_worker(struct Drbd_thread *); -int drbd_asender(struct Drbd_thread *); +int drbdd_init(struct drbd_thread *); +int drbd_worker(struct drbd_thread *); +int drbd_asender(struct drbd_thread *); int drbd_init(void); static int drbd_open(struct block_device *bdev, fmode_t mode); static int drbd_release(struct gendisk *gd, fmode_t mode); STATIC int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); -STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state_t os, - union drbd_state_t ns, enum chg_state_flags flags); +STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags); STATIC int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); STATIC void md_sync_timer_fn(unsigned long data); STATIC int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); @@ -116,9 +116,9 @@ int allow_oos; unsigned int cn_idx = CN_IDX_DRBD; #ifdef ENABLE_DYNAMIC_TRACE -int trace_type; /* Bitmap of trace types to enable */ -int trace_level; /* Current trace level */ -int trace_devs; /* Bitmap of devices to trace */ +int trace_type; /* UI_BITMAP of trace types to enable */ +int trace_level; /* UI_CURRENT trace level */ +int trace_devs; /* UI_BITMAP of devices to trace */ int proc_details; /* Detail level in proc drbd*/ module_param(trace_level, int, 0644); @@ -186,9 +186,9 @@ int _inc_local_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) /************************* The transfer log start */ STATIC int tl_init(struct drbd_conf *mdev) { - struct drbd_barrier *b; + struct drbd_tl_epoch *b; - b = kmalloc(sizeof(struct drbd_barrier), GFP_KERNEL); + b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL); if (!b) return 0; INIT_LIST_HEAD(&b->requests); @@ -198,8 +198,8 @@ STATIC int tl_init(struct drbd_conf *mdev) b->n_req = 0; b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ - mdev->oldest_barrier = b; - mdev->newest_barrier = b; + mdev->oldest_tle = b; + mdev->newest_tle = b; INIT_LIST_HEAD(&mdev->out_of_sequence_requests); mdev->tl_hash = NULL; @@ -210,12 +210,12 @@ STATIC int tl_init(struct drbd_conf *mdev) STATIC void tl_cleanup(struct drbd_conf *mdev) { - D_ASSERT(mdev->oldest_barrier == mdev->newest_barrier); + D_ASSERT(mdev->oldest_tle == mdev->newest_tle); D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); - kfree(mdev->oldest_barrier); - mdev->oldest_barrier = NULL; - kfree(mdev->unused_spare_barrier); - mdev->unused_spare_barrier = NULL; + kfree(mdev->oldest_tle); + mdev->oldest_tle = NULL; + kfree(mdev->unused_spare_tle); + mdev->unused_spare_tle = NULL; kfree(mdev->tl_hash); mdev->tl_hash = NULL; mdev->tl_hash_s = 0; @@ -224,9 +224,9 @@ STATIC void tl_cleanup(struct drbd_conf *mdev) /** * _tl_add_barrier: Adds a barrier to the TL. */ -void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_barrier *new) +void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) { - struct drbd_barrier *newest_before; + struct drbd_tl_epoch *newest_before; INIT_LIST_HEAD(&new->requests); INIT_LIST_HEAD(&new->w.list); @@ -234,13 +234,13 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_barrier *new) new->next = NULL; new->n_req = 0; - newest_before = mdev->newest_barrier; + newest_before = mdev->newest_tle; /* never send a barrier number == 0, because that is special-cased * when using TCQ for our write ordering code */ new->br_number = (newest_before->br_number+1) ?: 1; - if (mdev->newest_barrier != new) { - mdev->newest_barrier->next = new; - mdev->newest_barrier = new; + if (mdev->newest_tle != new) { + mdev->newest_tle->next = new; + mdev->newest_tle = new; } } @@ -248,13 +248,13 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_barrier *new) void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, unsigned int set_size) { - struct drbd_barrier *b, *nob; /* next old barrier */ + struct drbd_tl_epoch *b, *nob; /* next old barrier */ struct list_head *le, *tle; struct drbd_request *r; spin_lock_irq(&mdev->req_lock); - b = mdev->oldest_barrier; + b = mdev->oldest_tle; /* first some paranoia code */ if (b == NULL) { @@ -297,12 +297,12 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { _tl_add_barrier(mdev, b); if (nob) - mdev->oldest_barrier = nob; + mdev->oldest_tle = nob; /* if nob == NULL b was the only barrier, and becomes the new - barrer. Threfore mdev->oldest_barrier points already to b */ + barrer. Threfore mdev->oldest_tle points already to b */ } else { D_ASSERT(nob != NULL); - mdev->oldest_barrier = nob; + mdev->oldest_tle = nob; kfree(b); } @@ -313,7 +313,7 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, bail: spin_unlock_irq(&mdev->req_lock); - drbd_force_state(mdev, NS(conn, ProtocolError)); + drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); } @@ -321,14 +321,14 @@ bail: * or from some after_state_ch */ void tl_clear(struct drbd_conf *mdev) { - struct drbd_barrier *b, *tmp; + struct drbd_tl_epoch *b, *tmp; struct list_head *le, *tle; struct drbd_request *r; int new_initial_bnr = net_random(); spin_lock_irq(&mdev->req_lock); - b = mdev->oldest_barrier; + b = mdev->oldest_tle; while (b) { list_for_each_safe(le, tle, &b->requests) { r = list_entry(le, struct drbd_request, tl_requests); @@ -346,7 +346,7 @@ void tl_clear(struct drbd_conf *mdev) if (b->w.cb != NULL) dec_ap_pending(mdev); - if (b == mdev->newest_barrier) { + if (b == mdev->newest_tle) { /* recycle, but reinit! */ D_ASSERT(tmp == NULL); INIT_LIST_HEAD(&b->requests); @@ -355,7 +355,7 @@ void tl_clear(struct drbd_conf *mdev) b->br_number = new_initial_bnr; b->n_req = 0; - mdev->oldest_barrier = b; + mdev->oldest_tle = b; break; } kfree(b); @@ -382,37 +382,37 @@ void tl_clear(struct drbd_conf *mdev) * unlikely(!drbd_bio_uptodate(e->bio)) case from kernel thread context. * See also drbd_chk_io_error * - * NOTE: we set ourselves FAILED here if on_io_error is Detach or Panic OR + * NOTE: we set ourselves FAILED here if on_io_error is EP_DETACH or Panic OR * if the forcedetach flag is set. This flag is set when failures * occur writing the meta data portion of the disk as they are * not recoverable. */ int drbd_io_error(struct drbd_conf *mdev, int forcedetach) { - enum io_error_handler eh; + enum drbd_io_error_p eh; unsigned long flags; int send; int ok = 1; - eh = PassOn; - if (inc_local_if_state(mdev, Failed)) { + eh = EP_PASS_ON; + if (inc_local_if_state(mdev, D_FAILED)) { eh = mdev->bc->dc.on_io_error; dec_local(mdev); } - if (!forcedetach && eh == PassOn) + if (!forcedetach && eh == EP_PASS_ON) return 1; spin_lock_irqsave(&mdev->req_lock, flags); - send = (mdev->state.disk == Failed); + send = (mdev->state.disk == D_FAILED); if (send) - _drbd_set_state(_NS(mdev, disk, Diskless), ChgStateHard, NULL); + _drbd_set_state(_NS(mdev, disk, D_DISKLESS), CS_HARD, NULL); spin_unlock_irqrestore(&mdev->req_lock, flags); if (!send) return ok; - if (mdev->state.conn >= Connected) { + if (mdev->state.conn >= C_CONNECTED) { ok = drbd_send_state(mdev); if (ok) dev_warn(DEV, "Notified peer that my disk is broken.\n"); @@ -429,7 +429,7 @@ int drbd_io_error(struct drbd_conf *mdev, int forcedetach) /* Releasing the backing device is done in after_state_ch() */ - if (eh == CallIOEHelper) + if (eh == EP_CALL_HELPER) drbd_khelper(mdev, "local-io-error"); return ok; @@ -441,22 +441,22 @@ int drbd_io_error(struct drbd_conf *mdev, int forcedetach) * transaction. Of course it returns 0 as soon as the connection is lost. */ STATIC int cl_wide_st_chg(struct drbd_conf *mdev, - union drbd_state_t os, union drbd_state_t ns) + union drbd_state os, union drbd_state ns) { - return (os.conn >= Connected && ns.conn >= Connected && - ((os.role != Primary && ns.role == Primary) || - (os.conn != StartingSyncT && ns.conn == StartingSyncT) || - (os.conn != StartingSyncS && ns.conn == StartingSyncS) || - (os.disk != Diskless && ns.disk == Diskless))) || - (os.conn >= Connected && ns.conn == Disconnecting) || - (os.conn == Connected && ns.conn == VerifyS); + return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && + ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || + (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || + (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || + (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || + (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || + (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); } int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, - union drbd_state_t mask, union drbd_state_t val) + union drbd_state mask, union drbd_state val) { unsigned long flags; - union drbd_state_t os, ns; + union drbd_state os, ns; int rv; spin_lock_irqsave(&mdev->req_lock, flags); @@ -470,41 +470,41 @@ int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, } void drbd_force_state(struct drbd_conf *mdev, - union drbd_state_t mask, union drbd_state_t val) + union drbd_state mask, union drbd_state val) { - drbd_change_state(mdev, ChgStateHard, mask, val); + drbd_change_state(mdev, CS_HARD, mask, val); } -int is_valid_state(struct drbd_conf *mdev, union drbd_state_t ns); +int is_valid_state(struct drbd_conf *mdev, union drbd_state ns); int is_valid_state_transition(struct drbd_conf *, - union drbd_state_t, union drbd_state_t); + union drbd_state, union drbd_state); int drbd_send_state_req(struct drbd_conf *, - union drbd_state_t, union drbd_state_t); + union drbd_state, union drbd_state); -STATIC enum set_st_err _req_st_cond(struct drbd_conf *mdev, - union drbd_state_t mask, union drbd_state_t val) +STATIC enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, + union drbd_state mask, union drbd_state val) { - union drbd_state_t os, ns; + union drbd_state os, ns; unsigned long flags; int rv; if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) - return SS_CW_Success; + return SS_CW_SUCCESS; if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) - return SS_CW_FailedByPeer; + return SS_CW_FAILED_BY_PEER; rv = 0; spin_lock_irqsave(&mdev->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; if (!cl_wide_st_chg(mdev, os, ns)) - rv = SS_CW_NoNeed; + rv = SS_CW_NO_NEED; if (!rv) { rv = is_valid_state(mdev, ns); - if (rv == SS_Success) { + if (rv == SS_SUCCESS) { rv = is_valid_state_transition(mdev, ns, os); - if (rv == SS_Success) + if (rv == SS_SUCCESS) rv = 0; /* cont waiting, otherwise fail. */ } } @@ -520,17 +520,17 @@ STATIC enum set_st_err _req_st_cond(struct drbd_conf *mdev, * It has a cousin named drbd_request_state(), which is always verbose. */ STATIC int drbd_req_state(struct drbd_conf *mdev, - union drbd_state_t mask, union drbd_state_t val, + union drbd_state mask, union drbd_state val, enum chg_state_flags f) { struct completion done; unsigned long flags; - union drbd_state_t os, ns; + union drbd_state os, ns; int rv; init_completion(&done); - if (f & ChgSerialize) + if (f & CS_SERIALIZE) mutex_lock(&mdev->state_mutex); spin_lock_irqsave(&mdev->req_lock, flags); @@ -539,12 +539,12 @@ STATIC int drbd_req_state(struct drbd_conf *mdev, if (cl_wide_st_chg(mdev, os, ns)) { rv = is_valid_state(mdev, ns); - if (rv == SS_Success) + if (rv == SS_SUCCESS) rv = is_valid_state_transition(mdev, ns, os); spin_unlock_irqrestore(&mdev->req_lock, flags); - if (rv < SS_Success) { - if (f & ChgStateVerbose) + if (rv < SS_SUCCESS) { + if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); goto abort; } @@ -552,8 +552,8 @@ STATIC int drbd_req_state(struct drbd_conf *mdev, drbd_state_lock(mdev); if (!drbd_send_state_req(mdev, mask, val)) { drbd_state_unlock(mdev); - rv = SS_CW_FailedByPeer; - if (f & ChgStateVerbose) + rv = SS_CW_FAILED_BY_PEER; + if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); goto abort; } @@ -561,10 +561,10 @@ STATIC int drbd_req_state(struct drbd_conf *mdev, wait_event(mdev->state_wait, (rv = _req_st_cond(mdev, mask, val))); - if (rv < SS_Success) { + if (rv < SS_SUCCESS) { /* nearly dead code. */ drbd_state_unlock(mdev); - if (f & ChgStateVerbose) + if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); goto abort; } @@ -579,13 +579,13 @@ STATIC int drbd_req_state(struct drbd_conf *mdev, spin_unlock_irqrestore(&mdev->req_lock, flags); - if (f & ChgWaitComplete && rv == SS_Success) { + if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { D_ASSERT(current != mdev->worker.task); wait_for_completion(&done); } abort: - if (f & ChgSerialize) + if (f & CS_SERIALIZE) mutex_unlock(&mdev->state_mutex); return rv; @@ -597,18 +597,18 @@ abort: * transition this function even does a cluster wide transaction. * It has a cousin named drbd_request_state(), which is always verbose. */ -int _drbd_request_state(struct drbd_conf *mdev, union drbd_state_t mask, - union drbd_state_t val, enum chg_state_flags f) +int _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, + union drbd_state val, enum chg_state_flags f) { int rv; wait_event(mdev->state_wait, - (rv = drbd_req_state(mdev, mask, val, f)) != SS_InTransientState); + (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); return rv; } -STATIC void print_st(struct drbd_conf *mdev, char *name, union drbd_state_t ns) +STATIC void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) { dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n", name, @@ -625,9 +625,9 @@ STATIC void print_st(struct drbd_conf *mdev, char *name, union drbd_state_t ns) } void print_st_err(struct drbd_conf *mdev, - union drbd_state_t os, union drbd_state_t ns, int err) + union drbd_state os, union drbd_state ns, int err) { - if (err == SS_InTransientState) + if (err == SS_IN_TRANSIENT_STATE) return; dev_err(DEV, "State change failed: %s\n", set_st_err_name(err)); print_st(mdev, " state", os); @@ -650,14 +650,14 @@ void print_st_err(struct drbd_conf *mdev, A##s_to_name(ns.A)); \ } }) -int is_valid_state(struct drbd_conf *mdev, union drbd_state_t ns) +int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) { /* See drbd_state_sw_errors in drbd_strings.c */ - enum fencing_policy fp; - int rv = SS_Success; + enum drbd_fencing_p fp; + int rv = SS_SUCCESS; - fp = DontCare; + fp = FP_DONT_CARE; if (inc_local(mdev)) { fp = mdev->bc->dc.fencing; dec_local(mdev); @@ -665,105 +665,105 @@ int is_valid_state(struct drbd_conf *mdev, union drbd_state_t ns) if (inc_net(mdev)) { if (!mdev->net_conf->two_primaries && - ns.role == Primary && ns.peer == Primary) - rv = SS_TwoPrimaries; + ns.role == R_PRIMARY && ns.peer == R_PRIMARY) + rv = SS_TWO_PRIMARIES; dec_net(mdev); } if (rv <= 0) /* already found a reason to abort */; - else if (ns.role == Secondary && mdev->open_cnt) - rv = SS_DeviceInUse; + else if (ns.role == R_SECONDARY && mdev->open_cnt) + rv = SS_DEVICE_IN_USE; - else if (ns.role == Primary && ns.conn < Connected && ns.disk < UpToDate) - rv = SS_NoUpToDateDisk; + else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE) + rv = SS_NO_UP_TO_DATE_DISK; - else if (fp >= Resource && - ns.role == Primary && ns.conn < Connected && ns.pdsk >= DUnknown) - rv = SS_PrimaryNOP; + else if (fp >= FP_RESOURCE && + ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN) + rv = SS_PRIMARY_NOP; - else if (ns.role == Primary && ns.disk <= Inconsistent && ns.pdsk <= Inconsistent) - rv = SS_NoUpToDateDisk; + else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT) + rv = SS_NO_UP_TO_DATE_DISK; - else if (ns.conn > Connected && ns.disk < UpToDate && ns.pdsk < UpToDate) - rv = SS_BothInconsistent; + else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) + rv = SS_BOTH_INCONSISTENT; - else if (ns.conn > Connected && (ns.disk == Diskless || ns.pdsk == Diskless)) - rv = SS_SyncingDiskless; + else if (ns.conn > C_CONNECTED && (ns.disk == D_DISKLESS || ns.pdsk == D_DISKLESS)) + rv = SS_SYNCING_DISKLESS; - else if ((ns.conn == Connected || - ns.conn == WFBitMapS || - ns.conn == SyncSource || - ns.conn == PausedSyncS) && - ns.disk == Outdated) - rv = SS_ConnectedOutdates; + else if ((ns.conn == C_CONNECTED || + ns.conn == C_WF_BITMAP_S || + ns.conn == C_SYNC_SOURCE || + ns.conn == C_PAUSED_SYNC_S) && + ns.disk == D_OUTDATED) + rv = SS_CONNECTED_OUTDATES; - else if ((ns.conn == VerifyS || ns.conn == VerifyT) && + else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && (mdev->sync_conf.verify_alg[0] == 0)) - rv = SS_NoVerifyAlg; + rv = SS_NO_VERIFY_ALG; - else if ((ns.conn == VerifyS || ns.conn == VerifyT) && + else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && mdev->agreed_pro_version < 88) - rv = SS_NotSupported; + rv = SS_NOT_SUPPORTED; return rv; } int is_valid_state_transition(struct drbd_conf *mdev, - union drbd_state_t ns, union drbd_state_t os) + union drbd_state ns, union drbd_state os) { - int rv = SS_Success; + int rv = SS_SUCCESS; - if ((ns.conn == StartingSyncT || ns.conn == StartingSyncS) && - os.conn > Connected) - rv = SS_ResyncRunning; + if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && + os.conn > C_CONNECTED) + rv = SS_RESYNC_RUNNING; - if (ns.conn == Disconnecting && os.conn == StandAlone) - rv = SS_AlreadyStandAlone; + if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE) + rv = SS_ALREADY_STANDALONE; - if (ns.disk > Attaching && os.disk == Diskless) - rv = SS_IsDiskLess; + if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS) + rv = SS_IS_DISKLESS; - if (ns.conn == WFConnection && os.conn < Unconnected) - rv = SS_NoNetConfig; + if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED) + rv = SS_NO_NET_CONFIG; - if (ns.disk == Outdated && os.disk < Outdated && os.disk != Attaching) - rv = SS_LowerThanOutdated; + if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING) + rv = SS_LOWER_THAN_OUTDATED; - if (ns.conn == Disconnecting && os.conn == Unconnected) - rv = SS_InTransientState; + if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) + rv = SS_IN_TRANSIENT_STATE; - if (ns.conn == os.conn && ns.conn == WFReportParams) - rv = SS_InTransientState; + if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) + rv = SS_IN_TRANSIENT_STATE; - if ((ns.conn == VerifyS || ns.conn == VerifyT) && os.conn < Connected) - rv = SS_NeedConnection; + if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) + rv = SS_NEED_CONNECTION; - if ((ns.conn == VerifyS || ns.conn == VerifyT) && - ns.conn != os.conn && os.conn > Connected) - rv = SS_ResyncRunning; + if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && + ns.conn != os.conn && os.conn > C_CONNECTED) + rv = SS_RESYNC_RUNNING; - if ((ns.conn == StartingSyncS || ns.conn == StartingSyncT) && - os.conn < Connected) - rv = SS_NeedConnection; + if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) && + os.conn < C_CONNECTED) + rv = SS_NEED_CONNECTION; return rv; } int __drbd_set_state(struct drbd_conf *mdev, - union drbd_state_t ns, enum chg_state_flags flags, + union drbd_state ns, enum chg_state_flags flags, struct completion *done) { - union drbd_state_t os; - int rv = SS_Success; + union drbd_state os; + int rv = SS_SUCCESS; int warn_sync_abort = 0; - enum fencing_policy fp; + enum drbd_fencing_p fp; struct after_state_chg_work *ascw; os = mdev->state; - fp = DontCare; + fp = FP_DONT_CARE; if (inc_local(mdev)) { fp = mdev->bc->dc.fencing; dec_local(mdev); @@ -772,125 +772,125 @@ int __drbd_set_state(struct drbd_conf *mdev, /* Early state sanitising. */ /* Dissalow Network errors to configure a device's network part */ - if ((ns.conn >= Timeout && ns.conn <= TearDown) && - os.conn <= Disconnecting) + if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && + os.conn <= C_DISCONNECTING) ns.conn = os.conn; - /* After a network error (+TearDown) only Unconnected or Disconnecting can follow */ - if (os.conn >= Timeout && os.conn <= TearDown && - ns.conn != Unconnected && ns.conn != Disconnecting) + /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow */ + if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && + ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING) ns.conn = os.conn; - /* After Disconnecting only StandAlone may follow */ - if (os.conn == Disconnecting && ns.conn != StandAlone) + /* After C_DISCONNECTING only C_STANDALONE may follow */ + if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) ns.conn = os.conn; - if (ns.conn < Connected) { + if (ns.conn < C_CONNECTED) { ns.peer_isp = 0; - ns.peer = Unknown; - if (ns.pdsk > DUnknown || ns.pdsk < Inconsistent) - ns.pdsk = DUnknown; + ns.peer = R_UNKNOWN; + if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT) + ns.pdsk = D_UNKNOWN; } /* Clear the aftr_isp when becomming Unconfigured */ - if (ns.conn == StandAlone && ns.disk == Diskless && ns.role == Secondary) + if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) ns.aftr_isp = 0; - if (ns.conn <= Disconnecting && ns.disk == Diskless) - ns.pdsk = DUnknown; + if (ns.conn <= C_DISCONNECTING && ns.disk == D_DISKLESS) + ns.pdsk = D_UNKNOWN; - if (os.conn > Connected && ns.conn > Connected && - (ns.disk <= Failed || ns.pdsk <= Failed)) { + if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && + (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { warn_sync_abort = 1; - ns.conn = Connected; + ns.conn = C_CONNECTED; } - if (ns.conn >= Connected && - ((ns.disk == Consistent || ns.disk == Outdated) || - (ns.disk == Negotiating && ns.conn == WFBitMapT))) { + if (ns.conn >= C_CONNECTED && + ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) || + (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) { switch (ns.conn) { - case WFBitMapT: - case PausedSyncT: - ns.disk = Outdated; + case C_WF_BITMAP_T: + case C_PAUSED_SYNC_T: + ns.disk = D_OUTDATED; break; - case Connected: - case WFBitMapS: - case SyncSource: - case PausedSyncS: - ns.disk = UpToDate; + case C_CONNECTED: + case C_WF_BITMAP_S: + case C_SYNC_SOURCE: + case C_PAUSED_SYNC_S: + ns.disk = D_UP_TO_DATE; break; - case SyncTarget: - ns.disk = Inconsistent; + case C_SYNC_TARGET: + ns.disk = D_INCONSISTENT; dev_warn(DEV, "Implicitly set disk state Inconsistent!\n"); break; } - if (os.disk == Outdated && ns.disk == UpToDate) + if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE) dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n"); } - if (ns.conn >= Connected && - (ns.pdsk == Consistent || ns.pdsk == Outdated)) { + if (ns.conn >= C_CONNECTED && + (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) { switch (ns.conn) { - case Connected: - case WFBitMapT: - case PausedSyncT: - case SyncTarget: - ns.pdsk = UpToDate; + case C_CONNECTED: + case C_WF_BITMAP_T: + case C_PAUSED_SYNC_T: + case C_SYNC_TARGET: + ns.pdsk = D_UP_TO_DATE; break; - case WFBitMapS: - case PausedSyncS: - ns.pdsk = Outdated; + case C_WF_BITMAP_S: + case C_PAUSED_SYNC_S: + ns.pdsk = D_OUTDATED; break; - case SyncSource: - ns.pdsk = Inconsistent; + case C_SYNC_SOURCE: + ns.pdsk = D_INCONSISTENT; dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n"); break; } - if (os.pdsk == Outdated && ns.pdsk == UpToDate) + if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE) dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n"); } /* Connection breaks down before we finished "Negotiating" */ - if (ns.conn < Connected && ns.disk == Negotiating && - inc_local_if_state(mdev, Negotiating)) { - if (mdev->ed_uuid == mdev->bc->md.uuid[Current]) { + if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && + inc_local_if_state(mdev, D_NEGOTIATING)) { + if (mdev->ed_uuid == mdev->bc->md.uuid[UI_CURRENT]) { ns.disk = mdev->new_state_tmp.disk; ns.pdsk = mdev->new_state_tmp.pdsk; } else { dev_alert(DEV, "Connection lost while negotiating, no data!\n"); - ns.disk = Diskless; - ns.pdsk = DUnknown; + ns.disk = D_DISKLESS; + ns.pdsk = D_UNKNOWN; } dec_local(mdev); } - if (fp == Stonith && - (ns.role == Primary && - ns.conn < Connected && - ns.pdsk > Outdated)) + if (fp == FP_STONITH && + (ns.role == R_PRIMARY && + ns.conn < C_CONNECTED && + ns.pdsk > D_OUTDATED)) ns.susp = 1; if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { - if (ns.conn == SyncSource) - ns.conn = PausedSyncS; - if (ns.conn == SyncTarget) - ns.conn = PausedSyncT; + if (ns.conn == C_SYNC_SOURCE) + ns.conn = C_PAUSED_SYNC_S; + if (ns.conn == C_SYNC_TARGET) + ns.conn = C_PAUSED_SYNC_T; } else { - if (ns.conn == PausedSyncS) - ns.conn = SyncSource; - if (ns.conn == PausedSyncT) - ns.conn = SyncTarget; + if (ns.conn == C_PAUSED_SYNC_S) + ns.conn = C_SYNC_SOURCE; + if (ns.conn == C_PAUSED_SYNC_T) + ns.conn = C_SYNC_TARGET; } if (ns.i == os.i) - return SS_NothingToDo; + return SS_NOTHING_TO_DO; - if (!(flags & ChgStateHard)) { + if (!(flags & CS_HARD)) { /* pre-state-change checks ; only look at ns */ /* See drbd_state_sw_errors in drbd_strings.c */ rv = is_valid_state(mdev, ns); - if (rv < SS_Success) { + if (rv < SS_SUCCESS) { /* If the old state was illegal as well, then let this happen...*/ @@ -906,8 +906,8 @@ int __drbd_set_state(struct drbd_conf *mdev, rv = is_valid_state_transition(mdev, ns, os); } - if (rv < SS_Success) { - if (flags & ChgStateVerbose) + if (rv < SS_SUCCESS) { + if (flags & CS_VERBOSE) print_st_err(mdev, os, ns, rv); return rv; } @@ -936,16 +936,16 @@ int __drbd_set_state(struct drbd_conf *mdev, wake_up(&mdev->state_wait); /** post-state-change actions **/ - if (os.conn >= SyncSource && ns.conn <= Connected) { + if (os.conn >= C_SYNC_SOURCE && ns.conn <= C_CONNECTED) { set_bit(STOP_SYNC_TIMER, &mdev->flags); mod_timer(&mdev->resync_timer, jiffies); } - if ((os.conn == PausedSyncT || os.conn == PausedSyncS) && - (ns.conn == SyncTarget || ns.conn == SyncSource)) { + if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && + (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { dev_info(DEV, "Syncer continues.\n"); mdev->rs_paused += (long)jiffies-(long)mdev->rs_mark_time; - if (ns.conn == SyncTarget) { + if (ns.conn == C_SYNC_TARGET) { if (!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags)) mod_timer(&mdev->resync_timer, jiffies); /* This if (!test_bit) is only needed for the case @@ -955,16 +955,16 @@ int __drbd_set_state(struct drbd_conf *mdev, } } - if ((os.conn == SyncTarget || os.conn == SyncSource) && - (ns.conn == PausedSyncT || ns.conn == PausedSyncS)) { + if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && + (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { dev_info(DEV, "Resync suspended\n"); mdev->rs_mark_time = jiffies; - if (ns.conn == PausedSyncT) + if (ns.conn == C_PAUSED_SYNC_T) set_bit(STOP_SYNC_TIMER, &mdev->flags); } - if (os.conn == Connected && - (ns.conn == VerifyS || ns.conn == VerifyT)) { + if (os.conn == C_CONNECTED && + (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { mdev->ov_position = 0; mdev->ov_left = mdev->rs_total = @@ -974,53 +974,53 @@ int __drbd_set_state(struct drbd_conf *mdev, mdev->ov_last_oos_size = 0; mdev->ov_last_oos_start = 0; - if (ns.conn == VerifyS) + if (ns.conn == C_VERIFY_S) mod_timer(&mdev->resync_timer, jiffies); } if (inc_local(mdev)) { - u32 mdf = mdev->bc->md.flags & ~(MDF_Consistent|MDF_PrimaryInd| - MDF_ConnectedInd|MDF_WasUpToDate| - MDF_PeerOutDated|MDF_CrashedPrimary); + u32 mdf = mdev->bc->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| + MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| + MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); if (test_bit(CRASHED_PRIMARY, &mdev->flags)) - mdf |= MDF_CrashedPrimary; - if (mdev->state.role == Primary || - (mdev->state.pdsk < Inconsistent && mdev->state.peer == Primary)) - mdf |= MDF_PrimaryInd; - if (mdev->state.conn > WFReportParams) - mdf |= MDF_ConnectedInd; - if (mdev->state.disk > Inconsistent) - mdf |= MDF_Consistent; - if (mdev->state.disk > Outdated) - mdf |= MDF_WasUpToDate; - if (mdev->state.pdsk <= Outdated && mdev->state.pdsk >= Inconsistent) - mdf |= MDF_PeerOutDated; + mdf |= MDF_CRASHED_PRIMARY; + if (mdev->state.role == R_PRIMARY || + (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY)) + mdf |= MDF_PRIMARY_IND; + if (mdev->state.conn > C_WF_REPORT_PARAMS) + mdf |= MDF_CONNECTED_IND; + if (mdev->state.disk > D_INCONSISTENT) + mdf |= MDF_CONSISTENT; + if (mdev->state.disk > D_OUTDATED) + mdf |= MDF_WAS_UP_TO_DATE; + if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT) + mdf |= MDF_PEER_OUT_DATED; if (mdf != mdev->bc->md.flags) { mdev->bc->md.flags = mdf; drbd_md_mark_dirty(mdev); } - if (os.disk < Consistent && ns.disk >= Consistent) - drbd_set_ed_uuid(mdev, mdev->bc->md.uuid[Current]); + if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) + drbd_set_ed_uuid(mdev, mdev->bc->md.uuid[UI_CURRENT]); dec_local(mdev); } - /* Peer was forced UpToDate & Primary, consider to resync */ - if (os.disk == Inconsistent && os.pdsk == Inconsistent && - os.peer == Secondary && ns.peer == Primary) + /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ + if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT && + os.peer == R_SECONDARY && ns.peer == R_PRIMARY) set_bit(CONSIDER_RESYNC, &mdev->flags); /* Receiver should clean up itself */ - if (os.conn != Disconnecting && ns.conn == Disconnecting) + if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) drbd_thread_stop_nowait(&mdev->receiver); /* Now the receiver finished cleaning up itself, it should die */ - if (os.conn != StandAlone && ns.conn == StandAlone) + if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) drbd_thread_stop_nowait(&mdev->receiver); /* Upon network failure, we need to restart the receiver. */ - if (os.conn > TearDown && - ns.conn <= TearDown && ns.conn >= Timeout) + if (os.conn > C_TEAR_DOWN && + ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) drbd_thread_restart_nowait(&mdev->receiver); ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); @@ -1044,7 +1044,7 @@ STATIC int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unu ascw = (struct after_state_chg_work *) w; after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); - if (ascw->flags & ChgWaitComplete) { + if (ascw->flags & CS_WAIT_COMPLETE) { D_ASSERT(ascw->done != NULL); complete(ascw->done); } @@ -1057,32 +1057,32 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv) { if (rv) { dev_err(DEV, "Writing the bitmap failed not starting resync.\n"); - _drbd_request_state(mdev, NS(conn, Connected), ChgStateVerbose); + _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE); return; } switch (mdev->state.conn) { - case StartingSyncT: - _drbd_request_state(mdev, NS(conn, WFSyncUUID), ChgStateVerbose); + case C_STARTING_SYNC_T: + _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); break; - case StartingSyncS: - drbd_start_resync(mdev, SyncSource); + case C_STARTING_SYNC_S: + drbd_start_resync(mdev, C_SYNC_SOURCE); break; } } -STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state_t os, - union drbd_state_t ns, enum chg_state_flags flags) +STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags) { - enum fencing_policy fp; + enum drbd_fencing_p fp; - if (os.conn != Connected && ns.conn == Connected) { + if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { clear_bit(CRASHED_PRIMARY, &mdev->flags); if (mdev->p_uuid) - mdev->p_uuid[UUID_FLAGS] &= ~((u64)2); + mdev->p_uuid[UI_FLAGS] &= ~((u64)2); } - fp = DontCare; + fp = FP_DONT_CARE; if (inc_local(mdev)) { fp = mdev->bc->dc.fencing; dec_local(mdev); @@ -1091,44 +1091,44 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state_t os, /* Inform userspace about the change... */ drbd_bcast_state(mdev, ns); - if (!(os.role == Primary && os.disk < UpToDate && os.pdsk < UpToDate) && - (ns.role == Primary && ns.disk < UpToDate && ns.pdsk < UpToDate)) + if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && + (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) drbd_khelper(mdev, "pri-on-incon-degr"); /* Here we have the actions that are performed after a state change. This function might sleep */ - if (fp == Stonith && ns.susp) { + if (fp == FP_STONITH && ns.susp) { /* case1: The outdate peer handler is successfull: * case2: The connection was established again: */ - if ((os.pdsk > Outdated && ns.pdsk <= Outdated) || - (os.conn < Connected && ns.conn >= Connected)) { + if ((os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) || + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)) { tl_clear(mdev); spin_lock_irq(&mdev->req_lock); - _drbd_set_state(_NS(mdev, susp, 0), ChgStateVerbose, NULL); + _drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL); spin_unlock_irq(&mdev->req_lock); } } /* Do not change the order of the if above and the two below... */ - if (os.pdsk == Diskless && ns.pdsk > Diskless) { /* attach on the peer */ + if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ drbd_send_uuids(mdev); drbd_send_state(mdev); } - if (os.conn != WFBitMapS && ns.conn == WFBitMapS) + if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S) drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)"); /* Lost contact to peer's copy of the data */ - if ((os.pdsk >= Inconsistent && - os.pdsk != DUnknown && - os.pdsk != Outdated) - && (ns.pdsk < Inconsistent || - ns.pdsk == DUnknown || - ns.pdsk == Outdated)) { + if ((os.pdsk >= D_INCONSISTENT && + os.pdsk != D_UNKNOWN && + os.pdsk != D_OUTDATED) + && (ns.pdsk < D_INCONSISTENT || + ns.pdsk == D_UNKNOWN || + ns.pdsk == D_OUTDATED)) { kfree(mdev->p_uuid); mdev->p_uuid = NULL; if (inc_local(mdev)) { - if ((ns.role == Primary || ns.peer == Primary) && - mdev->bc->md.uuid[Bitmap] == 0 && ns.disk >= UpToDate) { + if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && + mdev->bc->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { drbd_uuid_new_current(mdev); drbd_send_uuids(mdev); } @@ -1136,19 +1136,19 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state_t os, } } - if (ns.pdsk < Inconsistent && inc_local(mdev)) { - if (ns.peer == Primary && mdev->bc->md.uuid[Bitmap] == 0) + if (ns.pdsk < D_INCONSISTENT && inc_local(mdev)) { + if (ns.peer == R_PRIMARY && mdev->bc->md.uuid[UI_BITMAP] == 0) drbd_uuid_new_current(mdev); - /* Diskless Peer becomes secondary */ - if (os.peer == Primary && ns.peer == Secondary) + /* D_DISKLESS Peer becomes secondary */ + if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) drbd_al_to_on_disk_bm(mdev); dec_local(mdev); } /* Last part of the attaching process ... */ - if (ns.conn >= Connected && - os.disk == Attaching && ns.disk == Negotiating) { + if (ns.conn >= C_CONNECTED && + os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */ mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */ drbd_send_sizes(mdev); /* to start sync... */ @@ -1157,7 +1157,7 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state_t os, } /* We want to pause/continue resync, tell peer. */ - if (ns.conn >= Connected && + if (ns.conn >= C_CONNECTED && ((os.aftr_isp != ns.aftr_isp) || (os.user_isp != ns.user_isp))) drbd_send_state(mdev); @@ -1169,22 +1169,22 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state_t os, /* Make sure the peer gets informed about eventual state changes (ISP bits) while we were in WFReportParams. */ - if (os.conn == WFReportParams && ns.conn >= Connected) + if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) drbd_send_state(mdev); /* We are in the progress to start a full sync... */ - if ((os.conn != StartingSyncT && ns.conn == StartingSyncT) || - (os.conn != StartingSyncS && ns.conn == StartingSyncS)) + if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || + (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync"); /* We are invalidating our self... */ - if (os.conn < Connected && ns.conn < Connected && - os.disk > Inconsistent && ns.disk == Inconsistent) + if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && + os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); - if (os.disk > Diskless && ns.disk == Diskless) { - /* since inc_local() only works as long as disk>=Inconsistent, - and it is Diskless here, local_cnt can only go down, it can + if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) { + /* since inc_local() only works as long as disk>=D_INCONSISTENT, + and it is D_DISKLESS here, local_cnt can only go down, it can not increase... It will reach zero */ wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); @@ -1201,25 +1201,25 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state_t os, } /* Disks got bigger while they were detached */ - if (ns.disk > Negotiating && ns.pdsk > Negotiating && + if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { - if (ns.conn == Connected) + if (ns.conn == C_CONNECTED) resync_after_online_grow(mdev); } /* A resync finished or aborted, wake paused devices... */ - if ((os.conn > Connected && ns.conn <= Connected) || + if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) || (os.peer_isp && !ns.peer_isp) || (os.user_isp && !ns.user_isp)) resume_next_sg(mdev); /* Upon network connection, we need to start the received */ - if (os.conn == StandAlone && ns.conn == Unconnected) + if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) drbd_thread_start(&mdev->receiver); /* Terminate worker thread if we are unconfigured - it will be restarted as needed... */ - if (ns.disk == Diskless && ns.conn == StandAlone && ns.role == Secondary) { + if (ns.disk == D_DISKLESS && ns.conn == C_STANDALONE && ns.role == R_SECONDARY) { if (os.aftr_isp != ns.aftr_isp) resume_next_sg(mdev); drbd_thread_stop_nowait(&mdev->worker); @@ -1231,7 +1231,7 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state_t os, STATIC int drbd_thread_setup(void *arg) { - struct Drbd_thread *thi = (struct Drbd_thread *) arg; + struct drbd_thread *thi = (struct drbd_thread *) arg; struct drbd_conf *mdev = thi->mdev; int retval; @@ -1242,7 +1242,7 @@ restart: /* if the receiver has been "Exiting", the last thing it did * was set the conn state to "StandAlone", - * if now a re-connect request comes in, conn state goes Unconnected, + * if now a re-connect request comes in, conn state goes C_UNCONNECTED, * and receiver thread will be "started". * drbd_thread_start needs to set "Restarting" in that case. * t_state check and assignement needs to be within the same spinlock, @@ -1270,8 +1270,8 @@ restart: return retval; } -STATIC void drbd_thread_init(struct drbd_conf *mdev, struct Drbd_thread *thi, - int (*func) (struct Drbd_thread *)) +STATIC void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, + int (*func) (struct drbd_thread *)) { spin_lock_init(&thi->t_lock); thi->task = NULL; @@ -1280,7 +1280,7 @@ STATIC void drbd_thread_init(struct drbd_conf *mdev, struct Drbd_thread *thi, thi->mdev = mdev; } -int drbd_thread_start(struct Drbd_thread *thi) +int drbd_thread_start(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; struct task_struct *nt; @@ -1338,9 +1338,9 @@ int drbd_thread_start(struct Drbd_thread *thi) } -void _drbd_thread_stop(struct Drbd_thread *thi, int restart, int wait) +void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) { - enum Drbd_thread_state ns = restart ? Restarting : Exiting; + enum drbd_thread_state ns = restart ? Restarting : Exiting; spin_lock(&thi->t_lock); @@ -1405,7 +1405,7 @@ cpumask_t drbd_calc_cpu_mask(struct drbd_conf *mdev) void drbd_thread_current_set_cpu(struct drbd_conf *mdev) { struct task_struct *p = current; - struct Drbd_thread *thi = + struct drbd_thread *thi = p == mdev->asender.task ? &mdev->asender : p == mdev->receiver.task ? &mdev->receiver : p == mdev->worker.task ? &mdev->worker : @@ -1425,7 +1425,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum Drbd_Packet_Cmd cmd, struct Drbd_Header *h, + enum drbd_packets cmd, struct p_header *h, size_t size, unsigned msg_flags) { int sent, ok; @@ -1435,7 +1435,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, h->magic = BE_DRBD_MAGIC; h->command = cpu_to_be16(cmd); - h->length = cpu_to_be16(size-sizeof(struct Drbd_Header)); + h->length = cpu_to_be16(size-sizeof(struct p_header)); dump_packet(mdev, sock, 0, (void *)h, __FILE__, __LINE__); sent = drbd_send(mdev, sock, h, size, msg_flags); @@ -1451,7 +1451,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, * when we hold the appropriate socket mutex. */ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum Drbd_Packet_Cmd cmd, struct Drbd_Header *h, size_t size) + enum drbd_packets cmd, struct p_header *h, size_t size) { int ok = 0; struct socket *sock; @@ -1476,10 +1476,10 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, return ok; } -int drbd_send_cmd2(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, char *data, +int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size) { - struct Drbd_Header h; + struct p_header h; int ok; h.magic = BE_DRBD_MAGIC; @@ -1503,15 +1503,15 @@ int drbd_send_cmd2(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, char *data, int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) { - struct Drbd_SyncParam89_Packet *p; + struct p_rs_param_89 *p; struct socket *sock; int size, rv; const int apv = mdev->agreed_pro_version; - size = apv <= 87 ? sizeof(struct Drbd_SyncParam_Packet) - : apv == 88 ? sizeof(struct Drbd_SyncParam_Packet) + size = apv <= 87 ? sizeof(struct p_rs_param) + : apv == 88 ? sizeof(struct p_rs_param) + strlen(mdev->sync_conf.verify_alg) + 1 - : /* 89 */ sizeof(struct Drbd_SyncParam89_Packet); + : /* 89 */ sizeof(struct p_rs_param_89); /* used from admin command context and receiver/worker context. * to avoid kmalloc, grab the socket right here, @@ -1520,9 +1520,9 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) sock = mdev->data.socket; if (likely(sock != NULL)) { - enum Drbd_Packet_Cmd cmd = apv >= 89 ? SyncParam89 : SyncParam; + enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; - p = &mdev->data.sbuf.SyncParam89; + p = &mdev->data.sbuf.rs_param_89; /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); @@ -1545,10 +1545,10 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) int drbd_send_protocol(struct drbd_conf *mdev) { - struct Drbd_Protocol_Packet *p; + struct p_protocol *p; int size, rv; - size = sizeof(struct Drbd_Protocol_Packet); + size = sizeof(struct p_protocol); if (mdev->agreed_pro_version >= 87) size += strlen(mdev->net_conf->integrity_alg) + 1; @@ -1567,34 +1567,34 @@ int drbd_send_protocol(struct drbd_conf *mdev) if (mdev->agreed_pro_version >= 87) strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); - rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, ReportProtocol, - (struct Drbd_Header *)p, size); + rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, + (struct p_header *)p, size); kfree(p); return rv; } int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) { - struct Drbd_GenCnt_Packet p; + struct p_uuids p; int i; - if (!inc_local_if_state(mdev, Negotiating)) + if (!inc_local_if_state(mdev, D_NEGOTIATING)) return 1; - for (i = Current; i < UUID_SIZE; i++) + for (i = UI_CURRENT; i < UI_SIZE; i++) p.uuid[i] = mdev->bc ? cpu_to_be64(mdev->bc->md.uuid[i]) : 0; mdev->comm_bm_set = drbd_bm_total_weight(mdev); - p.uuid[UUID_SIZE] = cpu_to_be64(mdev->comm_bm_set); + p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); uuid_flags |= mdev->net_conf->want_lose ? 1 : 0; uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; - uuid_flags |= mdev->new_state_tmp.disk == Inconsistent ? 4 : 0; - p.uuid[UUID_FLAGS] = cpu_to_be64(uuid_flags); + uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; + p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); dec_local(mdev); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, ReportUUIDs, - (struct Drbd_Header *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, + (struct p_header *)&p, sizeof(p)); } int drbd_send_uuids(struct drbd_conf *mdev) @@ -1610,22 +1610,22 @@ int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev) int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) { - struct Drbd_SyncUUID_Packet p; + struct p_rs_uuid p; p.uuid = cpu_to_be64(val); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, ReportSyncUUID, - (struct Drbd_Header *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, + (struct p_header *)&p, sizeof(p)); } int drbd_send_sizes(struct drbd_conf *mdev) { - struct Drbd_Sizes_Packet p; + struct p_sizes p; sector_t d_size, u_size; int q_order_type; int ok; - if (inc_local_if_state(mdev, Negotiating)) { + if (inc_local_if_state(mdev, D_NEGOTIATING)) { D_ASSERT(mdev->bc->backing_bdev); d_size = drbd_get_max_capacity(mdev->bc); u_size = mdev->bc->dc.disk_size; @@ -1644,22 +1644,22 @@ int drbd_send_sizes(struct drbd_conf *mdev) p.max_segment_size = cpu_to_be32(mdev->rq_queue->max_segment_size); p.queue_order_type = cpu_to_be32(q_order_type); - ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, ReportSizes, - (struct Drbd_Header *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, + (struct p_header *)&p, sizeof(p)); return ok; } /** * drbd_send_state: * Informs the peer about our state. Only call it when - * mdev->state.conn >= Connected (I.e. you may not call it while in + * mdev->state.conn >= C_CONNECTED (I.e. you may not call it while in * WFReportParams. Though there is one valid and necessary exception, * drbd_connect() calls drbd_send_state() while in it WFReportParams. */ int drbd_send_state(struct drbd_conf *mdev) { struct socket *sock; - struct Drbd_State_Packet p; + struct p_state p; int ok = 0; /* Grab state lock so we wont send state if we're in the middle @@ -1672,8 +1672,8 @@ int drbd_send_state(struct drbd_conf *mdev) sock = mdev->data.socket; if (likely(sock != NULL)) { - ok = _drbd_send_cmd(mdev, sock, ReportState, - (struct Drbd_Header *)&p, sizeof(p), 0); + ok = _drbd_send_cmd(mdev, sock, P_STATE, + (struct p_header *)&p, sizeof(p), 0); } mutex_unlock(&mdev->data.mutex); @@ -1683,32 +1683,32 @@ int drbd_send_state(struct drbd_conf *mdev) } int drbd_send_state_req(struct drbd_conf *mdev, - union drbd_state_t mask, union drbd_state_t val) + union drbd_state mask, union drbd_state val) { - struct Drbd_Req_State_Packet p; + struct p_req_state p; p.mask = cpu_to_be32(mask.i); p.val = cpu_to_be32(val.i); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, StateChgRequest, - (struct Drbd_Header *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ, + (struct p_header *)&p, sizeof(p)); } int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) { - struct Drbd_RqS_Reply_Packet p; + struct p_req_state_reply p; p.retcode = cpu_to_be32(retcode); - return drbd_send_cmd(mdev, USE_META_SOCKET, StateChgReply, - (struct Drbd_Header *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, + (struct p_header *)&p, sizeof(p)); } /* returns * positive: number of payload bytes needed in this packet. * zero: incompressible. */ int fill_bitmap_rle_bytes(struct drbd_conf *mdev, - struct Drbd_Compressed_Bitmap_Packet *p, + struct p_compressed_bm *p, struct bm_xfer_ctx *c) { unsigned long plain_bits; @@ -1801,7 +1801,7 @@ int fill_bitmap_rle_bytes(struct drbd_conf *mdev, } int fill_bitmap_rle_bits(struct drbd_conf *mdev, - struct Drbd_Compressed_Bitmap_Packet *p, + struct p_compressed_bm *p, struct bm_xfer_ctx *c) { struct bitstream bs; @@ -1897,9 +1897,9 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, enum { OK, FAILED, DONE } send_bitmap_rle_or_plain(struct drbd_conf *mdev, - struct Drbd_Header *h, struct bm_xfer_ctx *c) + struct p_header *h, struct bm_xfer_ctx *c) { - struct Drbd_Compressed_Bitmap_Packet *p = (void*)h; + struct p_compressed_bm *p = (void*)h; unsigned long num_words; int len; int ok; @@ -1913,7 +1913,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, return FAILED; if (len) { DCBP_set_code(p, 0 ? RLE_VLI_Bytes : RLE_VLI_BitsFibD_3_5); - ok = _drbd_send_cmd(mdev, mdev->data.socket, ReportCBitMap, h, + ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h, sizeof(*p) + len, 0); c->packets[0]++; @@ -1928,13 +1928,13 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, len = num_words * sizeof(long); if (len) drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload); - ok = _drbd_send_cmd(mdev, mdev->data.socket, ReportBitMap, - h, sizeof(struct Drbd_Header) + len, 0); + ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP, + h, sizeof(struct p_header) + len, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; c->packets[1]++; - c->bytes[1] += sizeof(struct Drbd_Header) + len; + c->bytes[1] += sizeof(struct p_header) + len; if (c->bit_offset > c->bm_bits) c->bit_offset = c->bm_bits; @@ -1950,30 +1950,30 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, int _drbd_send_bitmap(struct drbd_conf *mdev) { struct bm_xfer_ctx c; - struct Drbd_Header *p; + struct p_header *p; int ret; ERR_IF(!mdev->bitmap) return FALSE; /* maybe we should use some per thread scratch page, * and allocate that during initial device creation? */ - p = (struct Drbd_Header *) __get_free_page(GFP_NOIO); + p = (struct p_header *) __get_free_page(GFP_NOIO); if (!p) { dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); return FALSE; } if (inc_local(mdev)) { - if (drbd_md_test_flag(mdev->bc, MDF_FullSync)) { + if (drbd_md_test_flag(mdev->bc, MDF_FULL_SYNC)) { dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n"); drbd_bm_set_all(mdev); if (drbd_bm_write(mdev)) { - /* write_bm did fail! Leave full sync flag set in Meta Data + /* write_bm did fail! Leave full sync flag set in Meta P_DATA * but otherwise process as per normal - need to tell other * side that a full resync is required! */ dev_err(DEV, "Failed to write bitmap to disk!\n"); } else { - drbd_md_clear_flag(mdev, MDF_FullSync); + drbd_md_clear_flag(mdev, MDF_FULL_SYNC); drbd_md_sync(mdev); } } @@ -2007,15 +2007,15 @@ int drbd_send_bitmap(struct drbd_conf *mdev) int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) { int ok; - struct Drbd_BarrierAck_Packet p; + struct p_barrier_ack p; p.barrier = barrier_nr; p.set_size = cpu_to_be32(set_size); - if (mdev->state.conn < Connected) + if (mdev->state.conn < C_CONNECTED) return FALSE; - ok = drbd_send_cmd(mdev, USE_META_SOCKET, BarrierAck, - (struct Drbd_Header *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, + (struct p_header *)&p, sizeof(p)); return ok; } @@ -2024,45 +2024,45 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) * This helper function expects the sector and block_id parameter already * in big endian! */ -STATIC int _drbd_send_ack(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, +STATIC int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, u64 sector, u32 blksize, u64 block_id) { int ok; - struct Drbd_BlockAck_Packet p; + struct p_block_ack p; p.sector = sector; p.block_id = block_id; p.blksize = blksize; p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); - if (!mdev->meta.socket || mdev->state.conn < Connected) + if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) return FALSE; ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, - (struct Drbd_Header *)&p, sizeof(p)); + (struct p_header *)&p, sizeof(p)); return ok; } -int drbd_send_ack_dp(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, - struct Drbd_Data_Packet *dp) +int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, + struct p_data *dp) { - const int header_size = sizeof(struct Drbd_Data_Packet) - - sizeof(struct Drbd_Header); - int data_size = ((struct Drbd_Header *)dp)->length - header_size; + const int header_size = sizeof(struct p_data) + - sizeof(struct p_header); + int data_size = ((struct p_header *)dp)->length - header_size; return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); } -int drbd_send_ack_rp(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, - struct Drbd_BlockRequest_Packet *rp) +int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, + struct p_block_req *rp) { return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); } int drbd_send_ack(struct drbd_conf *mdev, - enum Drbd_Packet_Cmd cmd, struct Tl_epoch_entry *e) + enum drbd_packets cmd, struct drbd_epoch_entry *e) { return _drbd_send_ack(mdev, cmd, cpu_to_be64(e->sector), @@ -2072,7 +2072,7 @@ int drbd_send_ack(struct drbd_conf *mdev, /* This function misuses the block_id field to signal if the blocks * are is sync or not. */ -int drbd_send_ack_ex(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, +int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, sector_t sector, int blksize, u64 block_id) { return _drbd_send_ack(mdev, cmd, @@ -2085,24 +2085,24 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd, sector_t sector, int size, u64 block_id) { int ok; - struct Drbd_BlockRequest_Packet p; + struct p_block_req p; p.sector = cpu_to_be64(sector); p.block_id = block_id; p.blksize = cpu_to_be32(size); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, - (struct Drbd_Header *)&p, sizeof(p)); + (struct p_header *)&p, sizeof(p)); return ok; } int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, void *digest, int digest_size, - enum Drbd_Packet_Cmd cmd) + enum drbd_packets cmd) { int ok; - struct Drbd_BlockRequest_Packet p; + struct p_block_req p; p.sector = cpu_to_be64(sector); p.block_id = BE_DRBD_MAGIC + 0xbeef; @@ -2110,7 +2110,7 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, p.head.magic = BE_DRBD_MAGIC; p.head.command = cpu_to_be16(cmd); - p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct Drbd_Header) + digest_size); + p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header) + digest_size); mutex_lock(&mdev->data.mutex); @@ -2125,14 +2125,14 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) { int ok; - struct Drbd_BlockRequest_Packet p; + struct p_block_req p; p.sector = cpu_to_be64(sector); p.block_id = BE_DRBD_MAGIC + 0xbabe; p.blksize = cpu_to_be32(size); - ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, OVRequest, - (struct Drbd_Header *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, + (struct p_header *)&p, sizeof(p)); return ok; } @@ -2148,7 +2148,7 @@ STATIC int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * drop_it = mdev->meta.socket == sock || !mdev->asender.task || get_t_state(&mdev->asender) != Running - || mdev->state.conn < Connected; + || mdev->state.conn < C_CONNECTED; if (drop_it) return TRUE; @@ -2160,7 +2160,7 @@ STATIC int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * request_ping(mdev); } - return drop_it; /* && (mdev->state == Primary) */; + return drop_it; /* && (mdev->state == R_PRIMARY) */; } /* The idea of sendpage seems to be to put some kind of reference @@ -2237,7 +2237,7 @@ int _drbd_send_page(struct drbd_conf *mdev, struct page *page, } len -= sent; offset += sent; - } while (len > 0 /* THINK && mdev->cstate >= Connected*/); + } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/); set_fs(oldfs); clear_bit(NET_CONGESTED, &mdev->flags); @@ -2274,12 +2274,12 @@ static inline int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) } /* Used to send write requests - * Primary -> Peer (Data) + * R_PRIMARY -> Peer (P_DATA) */ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) { int ok = 1; - struct Drbd_Data_Packet p; + struct p_data p; unsigned int dp_flags = 0; void *dgb; int dgs; @@ -2291,9 +2291,9 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; p.head.magic = BE_DRBD_MAGIC; - p.head.command = cpu_to_be16(Data); + p.head.command = cpu_to_be16(P_DATA); p.head.length = - cpu_to_be16(sizeof(p) - sizeof(struct Drbd_Header) + dgs + req->size); + cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->size); p.sector = cpu_to_be64(req->sector); p.block_id = (unsigned long)req; @@ -2308,8 +2308,8 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dp_flags |= DP_HARDBARRIER; if (bio_sync(req->master_bio)) dp_flags |= DP_RW_SYNC; - if (mdev->state.conn >= SyncSource && - mdev->state.conn <= PausedSyncT) + if (mdev->state.conn >= C_SYNC_SOURCE && + mdev->state.conn <= C_PAUSED_SYNC_T) dp_flags |= DP_MAY_SET_IN_SYNC; p.dp_flags = cpu_to_be32(dp_flags); @@ -2334,14 +2334,14 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) } /* answer packet, used to send data back for read requests: - * Peer -> (diskless) Primary (DataReply) - * SyncSource -> SyncTarget (RSDataReply) + * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY) + * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY) */ -int drbd_send_block(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, - struct Tl_epoch_entry *e) +int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, + struct drbd_epoch_entry *e) { int ok; - struct Drbd_Data_Packet p; + struct p_data p; void *dgb; int dgs; @@ -2351,7 +2351,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum Drbd_Packet_Cmd cmd, p.head.magic = BE_DRBD_MAGIC; p.head.command = cpu_to_be16(cmd); p.head.length = - cpu_to_be16(sizeof(p) - sizeof(struct Drbd_Header) + dgs + e->size); + cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + e->size); p.sector = cpu_to_be64(e->sector); p.block_id = e->block_id; @@ -2457,9 +2457,9 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, dev_err(DEV, "%s_sendmsg returned %d\n", sock == mdev->meta.socket ? "msock" : "sock", rv); - drbd_force_state(mdev, NS(conn, BrokenPipe)); + drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); } else - drbd_force_state(mdev, NS(conn, Timeout)); + drbd_force_state(mdev, NS(conn, C_TIMEOUT)); } return sent; @@ -2475,7 +2475,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode) /* to have a stable mdev->state.role * and no race with updating open_cnt */ - if (mdev->state.role != Primary) { + if (mdev->state.role != R_PRIMARY) { if (mode & FMODE_WRITE) rv = -EROFS; else if (!allow_oos) @@ -2500,7 +2500,7 @@ STATIC void drbd_unplug_fn(struct request_queue *q) { struct drbd_conf *mdev = q->queuedata; - MTRACE(TraceTypeUnplug, TraceLvlSummary, + MTRACE(TRACE_TYPE_UNPLUG, TRACE_LVL_SUMMARY, dev_info(DEV, "got unplugged ap_bio_count=%d\n", atomic_read(&mdev->ap_bio_cnt)); ); @@ -2512,8 +2512,8 @@ STATIC void drbd_unplug_fn(struct request_queue *q) /* only if connected */ spin_lock_irq(&mdev->req_lock); - if (mdev->state.pdsk >= Inconsistent && mdev->state.conn >= Connected) { - D_ASSERT(mdev->state.role == Primary); + if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) { + D_ASSERT(mdev->state.role == R_PRIMARY); if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) { /* add to the data.work queue, * unless already queued. @@ -2526,7 +2526,7 @@ STATIC void drbd_unplug_fn(struct request_queue *q) } spin_unlock_irq(&mdev->req_lock); - if (mdev->state.disk >= Inconsistent) + if (mdev->state.disk >= D_INCONSISTENT) drbd_kick_lo(mdev); } @@ -2535,12 +2535,12 @@ STATIC void drbd_set_defaults(struct drbd_conf *mdev) mdev->sync_conf.after = DRBD_AFTER_DEF; mdev->sync_conf.rate = DRBD_RATE_DEF; mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_DEF; - mdev->state = (union drbd_state_t) { - { .role = Secondary, - .peer = Unknown, - .conn = StandAlone, - .disk = Diskless, - .pdsk = DUnknown, + mdev->state = (union drbd_state) { + { .role = R_SECONDARY, + .peer = R_UNKNOWN, + .conn = C_STANDALONE, + .disk = D_DISKLESS, + .pdsk = D_UNKNOWN, .susp = 0 } }; } @@ -2715,7 +2715,7 @@ STATIC int drbd_create_mempools(void) goto Enomem; drbd_ee_cache = kmem_cache_create( - "drbd_ee_cache", sizeof(struct Tl_epoch_entry), 0, 0, NULL); + "drbd_ee_cache", sizeof(struct drbd_epoch_entry), 0, 0, NULL); if (drbd_ee_cache == NULL) goto Enomem; @@ -3019,7 +3019,7 @@ int __init drbd_init(void) { int err; - if (sizeof(struct Drbd_HandShake_Packet) != 80) { + if (sizeof(struct p_handshake) != 80) { printk(KERN_ERR "drbd: never change the size or layout " "of the HandShake packet.\n"); @@ -3147,7 +3147,7 @@ void drbd_free_resources(struct drbd_conf *mdev) struct meta_data_on_disk { u64 la_size; /* last agreed size. */ - u64 uuid[UUID_SIZE]; /* UUIDs. */ + u64 uuid[UI_SIZE]; /* UUIDs. */ u64 device_uuid; u64 reserved_u64_1; u32 flags; /* MDF */ @@ -3176,12 +3176,12 @@ void drbd_md_sync(struct drbd_conf *mdev) return; del_timer(&mdev->md_sync_timer); - /* We use here Failed and not Attaching because we try to write + /* We use here D_FAILED and not D_ATTACHING because we try to write * metadata even if we detach due to a disk failure! */ - if (!inc_local_if_state(mdev, Failed)) + if (!inc_local_if_state(mdev, D_FAILED)) return; - MTRACE(TraceTypeMDIO, TraceLvlSummary, + MTRACE(TRACE_TYPE_MD_IO, TRACE_LVL_SUMMARY, dev_info(DEV, "Writing meta data super block now.\n"); ); @@ -3190,7 +3190,7 @@ void drbd_md_sync(struct drbd_conf *mdev) memset(buffer, 0, 512); buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); - for (i = Current; i < UUID_SIZE; i++) + for (i = UI_CURRENT; i < UI_SIZE; i++) buffer->uuid[i] = cpu_to_be64(mdev->bc->md.uuid[i]); buffer->flags = cpu_to_be32(mdev->bc->md.flags); buffer->magic = cpu_to_be32(DRBD_MD_MAGIC); @@ -3227,17 +3227,17 @@ void drbd_md_sync(struct drbd_conf *mdev) /** * drbd_md_read: * @bdev: describes the backing storage and the meta-data storage - * Reads the meta data from bdev. Return 0 (NoError) on success, and an - * enum ret_codes in case something goes wrong. - * Currently only: MDIOError, MDInvalid. + * Reads the meta data from bdev. Return 0 (NO_ERROR) on success, and an + * enum drbd_ret_codes in case something goes wrong. + * Currently only: ERR_IO_MD_DISK, MDInvalid. */ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { struct meta_data_on_disk *buffer; - int i, rv = NoError; + int i, rv = NO_ERROR; - if (!inc_local_if_state(mdev, Attaching)) - return MDIOError; + if (!inc_local_if_state(mdev, D_ATTACHING)) + return ERR_IO_MD_DISK; mutex_lock(&mdev->md_io_mutex); buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); @@ -3246,43 +3246,43 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* NOTE: cant do normal error processing here as this is called BEFORE disk is attached */ dev_err(DEV, "Error while reading metadata.\n"); - rv = MDIOError; + rv = ERR_IO_MD_DISK; goto err; } if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) { dev_err(DEV, "Error while reading metadata, magic not found.\n"); - rv = MDInvalid; + rv = ERR_MD_INVALID; goto err; } if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) { dev_err(DEV, "unexpected al_offset: %d (expected %d)\n", be32_to_cpu(buffer->al_offset), bdev->md.al_offset); - rv = MDInvalid; + rv = ERR_MD_INVALID; goto err; } if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) { dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n", be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset); - rv = MDInvalid; + rv = ERR_MD_INVALID; goto err; } if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) { dev_err(DEV, "unexpected md_size: %u (expected %u)\n", be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect); - rv = MDInvalid; + rv = ERR_MD_INVALID; goto err; } if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) { dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n", be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE); - rv = MDInvalid; + rv = ERR_MD_INVALID; goto err; } bdev->md.la_size_sect = be64_to_cpu(buffer->la_size); - for (i = Current; i < UUID_SIZE; i++) + for (i = UI_CURRENT; i < UI_SIZE; i++) bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]); bdev->md.flags = be32_to_cpu(buffer->flags); mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); @@ -3315,10 +3315,10 @@ STATIC void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) { int i; - for (i = History_start; i < History_end; i++) { + for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) { mdev->bc->md.uuid[i+1] = mdev->bc->md.uuid[i]; - MTRACE(TraceTypeUuid, TraceLvlAll, + MTRACE(TRACE_TYPE_UUID, TRACE_LVL_ALL, drbd_print_uuid(mdev, i+1); ); } @@ -3326,8 +3326,8 @@ STATIC void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) { - if (idx == Current) { - if (mdev->state.role == Primary) + if (idx == UI_CURRENT) { + if (mdev->state.role == R_PRIMARY) val |= 1; else val &= ~((u64)1); @@ -3337,7 +3337,7 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) mdev->bc->md.uuid[idx] = val; - MTRACE(TraceTypeUuid, TraceLvlSummary, + MTRACE(TRACE_TYPE_UUID, TRACE_LVL_SUMMARY, drbd_print_uuid(mdev, idx); ); @@ -3349,9 +3349,9 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) { if (mdev->bc->md.uuid[idx]) { drbd_uuid_move_history(mdev); - mdev->bc->md.uuid[History_start] = mdev->bc->md.uuid[idx]; - MTRACE(TraceTypeUuid, TraceLvlMetrics, - drbd_print_uuid(mdev, History_start); + mdev->bc->md.uuid[UI_HISTORY_START] = mdev->bc->md.uuid[idx]; + MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, + drbd_print_uuid(mdev, UI_HISTORY_START); ); } _drbd_uuid_set(mdev, idx, val); @@ -3367,39 +3367,39 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) u64 val; dev_info(DEV, "Creating new current UUID\n"); - D_ASSERT(mdev->bc->md.uuid[Bitmap] == 0); - mdev->bc->md.uuid[Bitmap] = mdev->bc->md.uuid[Current]; - MTRACE(TraceTypeUuid, TraceLvlMetrics, - drbd_print_uuid(mdev, Bitmap); + D_ASSERT(mdev->bc->md.uuid[UI_BITMAP] == 0); + mdev->bc->md.uuid[UI_BITMAP] = mdev->bc->md.uuid[UI_CURRENT]; + MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, + drbd_print_uuid(mdev, UI_BITMAP); ); get_random_bytes(&val, sizeof(u64)); - _drbd_uuid_set(mdev, Current, val); + _drbd_uuid_set(mdev, UI_CURRENT, val); } void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) { - if (mdev->bc->md.uuid[Bitmap] == 0 && val == 0) + if (mdev->bc->md.uuid[UI_BITMAP] == 0 && val == 0) return; if (val == 0) { drbd_uuid_move_history(mdev); - mdev->bc->md.uuid[History_start] = mdev->bc->md.uuid[Bitmap]; - mdev->bc->md.uuid[Bitmap] = 0; + mdev->bc->md.uuid[UI_HISTORY_START] = mdev->bc->md.uuid[UI_BITMAP]; + mdev->bc->md.uuid[UI_BITMAP] = 0; - MTRACE(TraceTypeUuid, TraceLvlMetrics, - drbd_print_uuid(mdev, History_start); - drbd_print_uuid(mdev, Bitmap); + MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, + drbd_print_uuid(mdev, UI_HISTORY_START); + drbd_print_uuid(mdev, UI_BITMAP); ); } else { - if (mdev->bc->md.uuid[Bitmap]) + if (mdev->bc->md.uuid[UI_BITMAP]) dev_warn(DEV, "bm UUID already set"); - mdev->bc->md.uuid[Bitmap] = val; - mdev->bc->md.uuid[Bitmap] &= ~((u64)1); + mdev->bc->md.uuid[UI_BITMAP] = val; + mdev->bc->md.uuid[UI_BITMAP] &= ~((u64)1); - MTRACE(TraceTypeUuid, TraceLvlMetrics, - drbd_print_uuid(mdev, Bitmap); + MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, + drbd_print_uuid(mdev, UI_BITMAP); ); } drbd_md_mark_dirty(mdev); @@ -3414,15 +3414,15 @@ int drbd_bmio_set_n_write(struct drbd_conf *mdev) { int rv = -EIO; - if (inc_local_if_state(mdev, Attaching)) { - drbd_md_set_flag(mdev, MDF_FullSync); + if (inc_local_if_state(mdev, D_ATTACHING)) { + drbd_md_set_flag(mdev, MDF_FULL_SYNC); drbd_md_sync(mdev); drbd_bm_set_all(mdev); rv = drbd_bm_write(mdev); if (!rv) { - drbd_md_clear_flag(mdev, MDF_FullSync); + drbd_md_clear_flag(mdev, MDF_FULL_SYNC); drbd_md_sync(mdev); } @@ -3441,7 +3441,7 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev) { int rv = -EIO; - if (inc_local_if_state(mdev, Attaching)) { + if (inc_local_if_state(mdev, D_ATTACHING)) { drbd_bm_clear_all(mdev); rv = drbd_bm_write(mdev); dec_local(mdev); @@ -3649,7 +3649,7 @@ STATIC char *_drbd_uuid_str(unsigned int idx) "UUID_FLAGS", }; - return (idx < EXT_UUID_SIZE) ? uuid_str[idx] : "*Unknown UUID index*"; + return (idx < UI_EXTENDED_SIZE) ? uuid_str[idx] : "*Unknown UUID index*"; } /* Pretty print a UUID value */ @@ -3814,7 +3814,7 @@ do { \ } \ } while (0) -STATIC char *dump_st(char *p, int len, union drbd_state_t mask, union drbd_state_t val) +STATIC char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val) { char *op = p; *p = '\0'; @@ -3829,7 +3829,7 @@ STATIC char *dump_st(char *p, int len, union drbd_state_t mask, union drbd_state #define INFOP(fmt, args...) \ do { \ - if (trace_level >= TraceLvlAll) { \ + if (trace_level >= TRACE_LVL_ALL) { \ dev_info(DEV, "%s:%d: %s [%d] %s %s " fmt , \ file, line, current->comm, current->pid, \ sockname, recv ? "<<<" : ">>>" , \ @@ -3853,123 +3853,123 @@ STATIC char *_dump_block_id(u64 block_id, char *buff) void _dump_packet(struct drbd_conf *mdev, struct socket *sock, - int recv, union Drbd_Polymorph_Packet *p, char *file, int line) + int recv, union p_polymorph *p, char *file, int line) { char *sockname = sock == mdev->meta.socket ? "meta" : "data"; - int cmd = (recv == 2) ? p->head.command : be16_to_cpu(p->head.command); + int cmd = (recv == 2) ? p->header.command : be16_to_cpu(p->header.command); char tmp[300]; - union drbd_state_t m, v; + union drbd_state m, v; switch (cmd) { - case HandShake: + case P_HAND_SHAKE: INFOP("%s (protocol %u-%u)\n", cmdname(cmd), - be32_to_cpu(p->HandShake.protocol_min), - be32_to_cpu(p->HandShake.protocol_max)); + be32_to_cpu(p->handshake.protocol_min), + be32_to_cpu(p->handshake.protocol_max)); break; - case ReportBitMap: /* don't report this */ - case ReportCBitMap: /* don't report this */ + case P_BITMAP: /* don't report this */ + case P_COMPRESSED_BITMAP: /* don't report this */ break; - case Data: + case P_DATA: INFOP("%s (sector %llus, id %s, seq %u, f %x)\n", cmdname(cmd), - (unsigned long long)be64_to_cpu(p->Data.sector), - _dump_block_id(p->Data.block_id, tmp), - be32_to_cpu(p->Data.seq_num), - be32_to_cpu(p->Data.dp_flags) + (unsigned long long)be64_to_cpu(p->data.sector), + _dump_block_id(p->data.block_id, tmp), + be32_to_cpu(p->data.seq_num), + be32_to_cpu(p->data.dp_flags) ); break; - case DataReply: - case RSDataReply: + case P_DATA_REPLY: + case P_RS_DATA_REPLY: INFOP("%s (sector %llus, id %s)\n", cmdname(cmd), - (unsigned long long)be64_to_cpu(p->Data.sector), - _dump_block_id(p->Data.block_id, tmp) + (unsigned long long)be64_to_cpu(p->data.sector), + _dump_block_id(p->data.block_id, tmp) ); break; - case RecvAck: - case WriteAck: - case RSWriteAck: - case DiscardAck: - case NegAck: - case NegRSDReply: + case P_RECV_ACK: + case P_WRITE_ACK: + case P_RS_WRITE_ACK: + case P_DISCARD_ACK: + case P_NEG_ACK: + case P_NEG_RS_DREPLY: INFOP("%s (sector %llus, size %u, id %s, seq %u)\n", cmdname(cmd), - (long long)be64_to_cpu(p->BlockAck.sector), - be32_to_cpu(p->BlockAck.blksize), - _dump_block_id(p->BlockAck.block_id, tmp), - be32_to_cpu(p->BlockAck.seq_num) + (long long)be64_to_cpu(p->block_ack.sector), + be32_to_cpu(p->block_ack.blksize), + _dump_block_id(p->block_ack.block_id, tmp), + be32_to_cpu(p->block_ack.seq_num) ); break; - case DataRequest: - case RSDataRequest: + case P_DATA_REQUEST: + case P_RS_DATA_REQUEST: INFOP("%s (sector %llus, size %u, id %s)\n", cmdname(cmd), - (long long)be64_to_cpu(p->BlockRequest.sector), - be32_to_cpu(p->BlockRequest.blksize), - _dump_block_id(p->BlockRequest.block_id, tmp) + (long long)be64_to_cpu(p->block_req.sector), + be32_to_cpu(p->block_req.blksize), + _dump_block_id(p->block_req.block_id, tmp) ); break; - case Barrier: - case BarrierAck: - INFOP("%s (barrier %u)\n", cmdname(cmd), p->Barrier.barrier); + case P_BARRIER: + case P_BARRIER_ACK: + INFOP("%s (barrier %u)\n", cmdname(cmd), p->barrier.barrier); break; - case SyncParam: - case SyncParam89: + case P_SYNC_PARAM: + case P_SYNC_PARAM89: INFOP("%s (rate %u, verify-alg \"%.64s\", csums-alg \"%.64s\")\n", - cmdname(cmd), be32_to_cpu(p->SyncParam89.rate), - p->SyncParam89.verify_alg, p->SyncParam89.csums_alg); + cmdname(cmd), be32_to_cpu(p->rs_param_89.rate), + p->rs_param_89.verify_alg, p->rs_param_89.csums_alg); break; - case ReportUUIDs: + case P_UUIDS: INFOP("%s Curr:%016llX, Bitmap:%016llX, " "HisSt:%016llX, HisEnd:%016llX\n", cmdname(cmd), - (unsigned long long)be64_to_cpu(p->GenCnt.uuid[Current]), - (unsigned long long)be64_to_cpu(p->GenCnt.uuid[Bitmap]), - (unsigned long long)be64_to_cpu(p->GenCnt.uuid[History_start]), - (unsigned long long)be64_to_cpu(p->GenCnt.uuid[History_end])); + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_CURRENT]), + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_BITMAP]), + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_START]), + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_END])); break; - case ReportSizes: + case P_SIZES: INFOP("%s (d %lluMiB, u %lluMiB, c %lldMiB, " "max bio %x, q order %x)\n", cmdname(cmd), - (long long)(be64_to_cpu(p->Sizes.d_size)>>(20-9)), - (long long)(be64_to_cpu(p->Sizes.u_size)>>(20-9)), - (long long)(be64_to_cpu(p->Sizes.c_size)>>(20-9)), - be32_to_cpu(p->Sizes.max_segment_size), - be32_to_cpu(p->Sizes.queue_order_type)); + (long long)(be64_to_cpu(p->sizes.d_size)>>(20-9)), + (long long)(be64_to_cpu(p->sizes.u_size)>>(20-9)), + (long long)(be64_to_cpu(p->sizes.c_size)>>(20-9)), + be32_to_cpu(p->sizes.max_segment_size), + be32_to_cpu(p->sizes.queue_order_type)); break; - case ReportState: - v.i = be32_to_cpu(p->State.state); + case P_STATE: + v.i = be32_to_cpu(p->state.state); m.i = 0xffffffff; dump_st(tmp, sizeof(tmp), m, v); INFOP("%s (s %x {%s})\n", cmdname(cmd), v.i, tmp); break; - case StateChgRequest: - m.i = be32_to_cpu(p->ReqState.mask); - v.i = be32_to_cpu(p->ReqState.val); + case P_STATE_CHG_REQ: + m.i = be32_to_cpu(p->req_state.mask); + v.i = be32_to_cpu(p->req_state.val); dump_st(tmp, sizeof(tmp), m, v); INFOP("%s (m %x v %x {%s})\n", cmdname(cmd), m.i, v.i, tmp); break; - case StateChgReply: + case P_STATE_CHG_REPLY: INFOP("%s (ret %x)\n", cmdname(cmd), - be32_to_cpu(p->RqSReply.retcode)); + be32_to_cpu(p->req_state_reply.retcode)); break; - case Ping: - case PingAck: + case P_PING: + case P_PING_ACK: /* * Dont trace pings at summary level */ - if (trace_level < TraceLvlAll) + if (trace_level < TRACE_LVL_ALL) break; /* fall through... */ default: @@ -4015,14 +4015,14 @@ void _dump_bio(const char *pfx, struct drbd_conf *mdev, struct bio *bio, int com bio->bi_sector << SECTOR_SHIFT, bio->bi_size); - if (trace_level >= TraceLvlMetrics && + if (trace_level >= TRACE_LVL_METRICS && ((biorw == WRITE) ^ complete)) { printk(KERN_DEBUG " ind page offset length\n"); __bio_for_each_segment(bvec, bio, segno, 0) { printk(KERN_DEBUG " [%d] %p %8.8x %8.8x\n", segno, bvec->bv_page, bvec->bv_offset, bvec->bv_len); - if (trace_level >= TraceLvlAll) { + if (trace_level >= TRACE_LVL_ALL) { char *bvec_buf; unsigned long flags; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 020c66741bd1..3b46a934c2d6 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -206,11 +206,11 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) char *ex_to_string; int r; enum drbd_disk_state nps; - enum fencing_policy fp; + enum drbd_fencing_p fp; - D_ASSERT(mdev->state.pdsk == DUnknown); + D_ASSERT(mdev->state.pdsk == D_UNKNOWN); - if (inc_local_if_state(mdev, Consistent)) { + if (inc_local_if_state(mdev, D_CONSISTENT)) { fp = mdev->bc->dc.fencing; dec_local(mdev); } else { @@ -218,42 +218,42 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) return mdev->state.pdsk; } - if (fp == Stonith) - _drbd_request_state(mdev, NS(susp, 1), ChgWaitComplete); + if (fp == FP_STONITH) + _drbd_request_state(mdev, NS(susp, 1), CS_WAIT_COMPLETE); r = drbd_khelper(mdev, "fence-peer"); switch ((r>>8) & 0xff) { case 3: /* peer is inconsistent */ ex_to_string = "peer is inconsistent or worse"; - nps = Inconsistent; + nps = D_INCONSISTENT; break; case 4: ex_to_string = "peer is outdated"; - nps = Outdated; + nps = D_OUTDATED; break; case 5: /* peer was down, we will(have) create(d) a new UUID anyways... */ - /* If we would be more strict, we would return DUnknown here. */ + /* If we would be more strict, we would return D_UNKNOWN here. */ ex_to_string = "peer is unreachable, assumed to be dead"; - nps = Outdated; + nps = D_OUTDATED; break; case 6: /* Peer is primary, voluntarily outdate myself. - * This is useful when an unconnected Secondary is asked to - * become Primary, but findes the other peer being active. */ + * This is useful when an unconnected R_SECONDARY is asked to + * become R_PRIMARY, but findes the other peer being active. */ ex_to_string = "peer is active"; dev_warn(DEV, "Peer is primary, outdating myself.\n"); - nps = DUnknown; - _drbd_request_state(mdev, NS(disk, Outdated), ChgWaitComplete); + nps = D_UNKNOWN; + _drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE); break; case 7: - if (fp != Stonith) + if (fp != FP_STONITH) dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n"); ex_to_string = "peer was stonithed"; - nps = Outdated; + nps = D_OUTDATED; break; default: /* The script is broken ... */ - nps = DUnknown; + nps = D_UNKNOWN; dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); return nps; } @@ -270,69 +270,69 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) int r = 0; int try = 0; int forced = 0; - union drbd_state_t mask, val; + union drbd_state mask, val; enum drbd_disk_state nps; - if (new_role == Primary) + if (new_role == R_PRIMARY) request_ping(mdev); /* Detect a dead peer ASAP */ mutex_lock(&mdev->state_mutex); - mask.i = 0; mask.role = role_mask; + mask.i = 0; mask.role = R_MASK; val.i = 0; val.role = new_role; while (try++ < max_tries) { - r = _drbd_request_state(mdev, mask, val, ChgWaitComplete); + r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE); /* in case we first succeeded to outdate, * but now suddenly could establish a connection */ - if (r == SS_CW_FailedByPeer && mask.pdsk != 0) { + if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) { val.pdsk = 0; mask.pdsk = 0; continue; } - if (r == SS_NoUpToDateDisk && force && - (mdev->state.disk == Inconsistent || - mdev->state.disk == Outdated)) { - mask.disk = disk_mask; - val.disk = UpToDate; + if (r == SS_NO_UP_TO_DATE_DISK && force && + (mdev->state.disk == D_INCONSISTENT || + mdev->state.disk == D_OUTDATED)) { + mask.disk = D_MASK; + val.disk = D_UP_TO_DATE; forced = 1; continue; } - if (r == SS_NoUpToDateDisk && - mdev->state.disk == Consistent) { - D_ASSERT(mdev->state.pdsk == DUnknown); + if (r == SS_NO_UP_TO_DATE_DISK && + mdev->state.disk == D_CONSISTENT) { + D_ASSERT(mdev->state.pdsk == D_UNKNOWN); nps = drbd_try_outdate_peer(mdev); - if (nps == Outdated) { - val.disk = UpToDate; - mask.disk = disk_mask; + if (nps == D_OUTDATED) { + val.disk = D_UP_TO_DATE; + mask.disk = D_MASK; } val.pdsk = nps; - mask.pdsk = disk_mask; + mask.pdsk = D_MASK; continue; } - if (r == SS_NothingToDo) + if (r == SS_NOTHING_TO_DO) goto fail; - if (r == SS_PrimaryNOP) { + if (r == SS_PRIMARY_NOP) { nps = drbd_try_outdate_peer(mdev); - if (force && nps > Outdated) { + if (force && nps > D_OUTDATED) { dev_warn(DEV, "Forced into split brain situation!\n"); - nps = Outdated; + nps = D_OUTDATED; } - mask.pdsk = disk_mask; + mask.pdsk = D_MASK; val.pdsk = nps; continue; } - if (r == SS_TwoPrimaries) { + if (r == SS_TWO_PRIMARIES) { /* Maybe the peer is detected as dead very soon... retry at most once more in this case. */ __set_current_state(TASK_INTERRUPTIBLE); @@ -341,10 +341,10 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) try = max_tries - 1; continue; } - if (r < SS_Success) { + if (r < SS_SUCCESS) { r = _drbd_request_state(mdev, mask, val, - ChgStateVerbose + ChgWaitComplete); - if (r < SS_Success) + CS_VERBOSE + CS_WAIT_COMPLETE); + if (r < SS_SUCCESS) goto fail; } break; @@ -358,10 +358,10 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) /* Wait until nothing is on the fly :) */ wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); - if (new_role == Secondary) { + if (new_role == R_SECONDARY) { set_disk_ro(mdev->vdisk, TRUE); if (inc_local(mdev)) { - mdev->bc->md.uuid[Current] &= ~(u64)1; + mdev->bc->md.uuid[UI_CURRENT] &= ~(u64)1; dec_local(mdev); } } else { @@ -371,22 +371,22 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } set_disk_ro(mdev->vdisk, FALSE); if (inc_local(mdev)) { - if (((mdev->state.conn < Connected || - mdev->state.pdsk <= Failed) - && mdev->bc->md.uuid[Bitmap] == 0) || forced) + if (((mdev->state.conn < C_CONNECTED || + mdev->state.pdsk <= D_FAILED) + && mdev->bc->md.uuid[UI_BITMAP] == 0) || forced) drbd_uuid_new_current(mdev); - mdev->bc->md.uuid[Current] |= (u64)1; + mdev->bc->md.uuid[UI_CURRENT] |= (u64)1; dec_local(mdev); } } - if ((new_role == Secondary) && inc_local(mdev)) { + if ((new_role == R_SECONDARY) && inc_local(mdev)) { drbd_al_to_on_disk_bm(mdev); dec_local(mdev); } - if (mdev->state.conn >= WFReportParams) { + if (mdev->state.conn >= C_WF_REPORT_PARAMS) { /* if this was forced, we should consider sync */ if (forced) drbd_send_uuids(mdev); @@ -409,12 +409,12 @@ STATIC int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, memset(&primary_args, 0, sizeof(struct primary)); if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) { - reply->ret_code = UnknownMandatoryTag; + reply->ret_code = ERR_MANDATORY_TAG; return 0; } reply->ret_code = - drbd_set_role(mdev, Primary, primary_args.overwrite_peer); + drbd_set_role(mdev, R_PRIMARY, primary_args.overwrite_peer); return 0; } @@ -422,7 +422,7 @@ STATIC int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, STATIC int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - reply->ret_code = drbd_set_role(mdev, Secondary, 0); + reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0); return 0; } @@ -486,16 +486,16 @@ char *ppsize(char *buf, unsigned long long size) } /* there is still a theoretical deadlock when called from receiver - * on an Inconsistent Primary: + * on an D_INCONSISTENT R_PRIMARY: * remote READ does inc_ap_bio, receiver would need to receive answer * packet from remote to dec_ap_bio again. * receiver receive_sizes(), comes here, * waits for ap_bio_cnt == 0. -> deadlock. * but this cannot happen, actually, because: - * Primary Inconsistent, and peer's disk is unreachable + * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable * (not connected, * or bad/no disk on peer): * see drbd_fail_request_early, ap_bio_cnt is zero. - * Primary Inconsistent, and SyncTarget: + * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET: * peer may not initiate a resize. */ void drbd_suspend_io(struct drbd_conf *mdev) @@ -520,7 +520,7 @@ void drbd_resume_io(struct drbd_conf *mdev) * indicate success. * You should call drbd_md_sync() after calling this function. */ -enum determin_dev_size_enum drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local) +enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local) { sector_t prev_first_sect, prev_size; /* previous meta location */ sector_t la_size; @@ -528,7 +528,7 @@ enum determin_dev_size_enum drbd_determin_dev_size(struct drbd_conf *mdev) __mus char ppb[10]; int md_moved, la_size_changed; - enum determin_dev_size_enum rv = unchanged; + enum determine_dev_size rv = unchanged; /* race: * application request passes inc_ap_bio, @@ -717,7 +717,7 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu max_seg_s = min(b->max_sectors * b->hardsect_size, max_seg_s); - MTRACE(TraceTypeRq, TraceLvlSummary, + MTRACE(TRACE_TYPE_RQ, TRACE_LVL_SUMMARY, DUMPI(b->max_sectors); DUMPI(b->max_phys_segments); DUMPI(b->max_hw_segments); @@ -739,15 +739,7 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu q->seg_boundary_mask = PAGE_SIZE-1; blk_queue_stack_limits(q, b); - /* KERNEL BUG. in ll_rw_blk.c ?? - * t->max_segment_size = min(t->max_segment_size,b->max_segment_size); - * should be - * t->max_segment_size = min_not_zero(...,...) - * workaround here: */ - if (q->max_segment_size == 0) - q->max_segment_size = max_seg_s; - - MTRACE(TraceTypeRq, TraceLvlSummary, + MTRACE(TRACE_TYPE_RQ, TRACE_LVL_SUMMARY, DUMPI(q->max_sectors); DUMPI(q->max_phys_segments); DUMPI(q->max_hw_segments); @@ -774,21 +766,21 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - enum ret_codes retcode; - enum determin_dev_size_enum dd; + enum drbd_ret_codes retcode; + enum determine_dev_size dd; sector_t max_possible_sectors; sector_t min_md_device_sectors; struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ struct inode *inode, *inode2; struct lru_cache *resync_lru = NULL; - union drbd_state_t ns, os; + union drbd_state ns, os; int rv, ntries = 0; int cp_discovered = 0; int hardsect; /* if you want to reconfigure, please tear down first */ - if (mdev->state.disk > Diskless) { - retcode = HaveDiskConfig; + if (mdev->state.disk > D_DISKLESS) { + retcode = ERR_DISK_CONFIGURED; goto fail; } @@ -802,7 +794,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp break; if (ntries++ >= 5) { dev_warn(DEV, "drbd_nl_disk_conf: mdev->bc not NULL.\n"); - retcode = HaveDiskConfig; + retcode = ERR_DISK_CONFIGURED; goto fail; } __set_current_state(TASK_INTERRUPTIBLE); @@ -811,7 +803,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc = kmalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto fail; } @@ -829,7 +821,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) { - retcode = UnknownMandatoryTag; + retcode = ERR_MANDATORY_TAG; goto fail; } @@ -837,7 +829,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->md_file = NULL; if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { - retcode = LDMDInvalid; + retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -846,14 +838,14 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, PTR_ERR(nbc->lo_file)); nbc->lo_file = NULL; - retcode = LDNameInvalid; + retcode = ERR_OPEN_DISK; goto fail; } inode = nbc->lo_file->f_dentry->d_inode; if (!S_ISBLK(inode->i_mode)) { - retcode = LDNoBlockDev; + retcode = ERR_DISK_NOT_BDEV; goto fail; } @@ -862,14 +854,14 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, PTR_ERR(nbc->md_file)); nbc->md_file = NULL; - retcode = MDNameInvalid; + retcode = ERR_OPEN_MD_DISK; goto fail; } inode2 = nbc->md_file->f_dentry->d_inode; if (!S_ISBLK(inode2->i_mode)) { - retcode = MDNoBlockDev; + retcode = ERR_MD_NOT_BDEV; goto fail; } @@ -880,19 +872,19 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->backing_bdev->bd_holder, nbc->backing_bdev->bd_contains->bd_holder, nbc->backing_bdev->bd_holders); - retcode = LDMounted; + retcode = ERR_BDCLAIM_DISK; goto fail; } resync_lru = lc_alloc("resync", 61, sizeof(struct bm_extent), mdev); if (!resync_lru) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto release_bdev_fail; } if (!mdev->bitmap) { if (drbd_bm_init(mdev)) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto release_bdev_fail; } } @@ -902,14 +894,14 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT) ? (void *)mdev : (void *) drbd_m_holder)) { - retcode = MDMounted; + retcode = ERR_BDCLAIM_MD_DISK; goto release_bdev_fail; } if ((nbc->backing_bdev == nbc->md_bdev) != (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { - retcode = LDMDInvalid; + retcode = ERR_MD_IDX_INVALID; goto release_bdev2_fail; } @@ -920,7 +912,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", (unsigned long long) drbd_get_max_capacity(nbc), (unsigned long long) nbc->dc.disk_size); - retcode = LDDeviceTooSmall; + retcode = ERR_DISK_TO_SMALL; goto release_bdev2_fail; } @@ -939,7 +931,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp (unsigned long long) max_possible_sectors); if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { - retcode = MDDeviceTooSmall; + retcode = ERR_MD_DISK_TO_SMALL; dev_warn(DEV, "refusing attach: md-device too small, " "at least %llu sectors needed for this meta-disk type\n", (unsigned long long) min_md_device_sectors); @@ -947,10 +939,10 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } /* Make sure the new disk is big enough - * (we may currently be Primary with no local disk...) */ + * (we may currently be R_PRIMARY with no local disk...) */ if (drbd_get_max_capacity(nbc) < drbd_get_capacity(mdev->this_bdev)) { - retcode = LDDeviceTooSmall; + retcode = ERR_DISK_TO_SMALL; goto release_bdev2_fail; } @@ -958,46 +950,46 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt)); - retcode = _drbd_request_state(mdev, NS(disk, Attaching), ChgStateVerbose); + retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); drbd_resume_io(mdev); - if (retcode < SS_Success) + if (retcode < SS_SUCCESS) goto release_bdev2_fail; - if (!inc_local_if_state(mdev, Attaching)) + if (!inc_local_if_state(mdev, D_ATTACHING)) goto force_diskless; drbd_thread_start(&mdev->worker); drbd_md_set_sector_offsets(mdev, nbc); retcode = drbd_md_read(mdev, nbc); - if (retcode != NoError) + if (retcode != NO_ERROR) goto force_diskless_dec; - if (mdev->state.conn < Connected && - mdev->state.role == Primary && - (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[Current] & ~((u64)1))) { + if (mdev->state.conn < C_CONNECTED && + mdev->state.role == R_PRIMARY && + (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { dev_err(DEV, "Can only attach to data with current UUID=%016llX\n", (unsigned long long)mdev->ed_uuid); - retcode = DataOfWrongCurrent; + retcode = ERR_DATA_NOT_CURRENT; goto force_diskless_dec; } /* Since we are diskless, fix the AL first... */ if (drbd_check_al_size(mdev)) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto force_diskless_dec; } /* Prevent shrinking of consistent devices ! */ - if (drbd_md_test_flag(nbc, MDF_Consistent) && + if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && drbd_new_dev_size(mdev, nbc) < nbc->md.la_size_sect) { dev_warn(DEV, "refusing to truncate a consistent device\n"); - retcode = LDDeviceTooSmall; + retcode = ERR_DISK_TO_SMALL; goto force_diskless_dec; } if (!drbd_al_read_log(mdev, nbc)) { - retcode = MDIOError; + retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } @@ -1040,12 +1032,12 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp mdev->write_ordering = WO_bio_barrier; drbd_bump_write_ordering(mdev, WO_bio_barrier); - if (drbd_md_test_flag(mdev->bc, MDF_CrashedPrimary)) + if (drbd_md_test_flag(mdev->bc, MDF_CRASHED_PRIMARY)) set_bit(CRASHED_PRIMARY, &mdev->flags); else clear_bit(CRASHED_PRIMARY, &mdev->flags); - if (drbd_md_test_flag(mdev->bc, MDF_PrimaryInd)) { + if (drbd_md_test_flag(mdev->bc, MDF_PRIMARY_IND)) { set_bit(CRASHED_PRIMARY, &mdev->flags); cp_discovered = 1; } @@ -1057,13 +1049,13 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_setup_queue_param(mdev, DRBD_MAX_SEGMENT_SIZE); - /* If I am currently not Primary, + /* If I am currently not R_PRIMARY, * but meta data primary indicator is set, * I just now recover from a hard crash, - * and have been Primary before that crash. + * and have been R_PRIMARY before that crash. * * Now, if I had no connection before that crash - * (have been degraded Primary), chances are that + * (have been degraded R_PRIMARY), chances are that * I won't find my peer now either. * * In that case, and _only_ in that case, @@ -1072,28 +1064,28 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * degraded but active "cluster" after a certain timeout. */ clear_bit(USE_DEGR_WFC_T, &mdev->flags); - if (mdev->state.role != Primary && - drbd_md_test_flag(mdev->bc, MDF_PrimaryInd) && - !drbd_md_test_flag(mdev->bc, MDF_ConnectedInd)) + if (mdev->state.role != R_PRIMARY && + drbd_md_test_flag(mdev->bc, MDF_PRIMARY_IND) && + !drbd_md_test_flag(mdev->bc, MDF_CONNECTED_IND)) set_bit(USE_DEGR_WFC_T, &mdev->flags); dd = drbd_determin_dev_size(mdev); if (dd == dev_size_error) { - retcode = VMallocFailed; + retcode = ERR_NOMEM_BITMAP; goto force_diskless_dec; } else if (dd == grew) set_bit(RESYNC_AFTER_NEG, &mdev->flags); - if (drbd_md_test_flag(mdev->bc, MDF_FullSync)) { + if (drbd_md_test_flag(mdev->bc, MDF_FULL_SYNC)) { dev_info(DEV, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) { - retcode = MDIOError; + retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } } else { if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) { - retcode = MDIOError; + retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } } @@ -1106,51 +1098,51 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp spin_lock_irq(&mdev->req_lock); os = mdev->state; ns.i = os.i; - /* If MDF_Consistent is not set go into inconsistent state, + /* If MDF_CONSISTENT is not set go into inconsistent state, otherwise investige MDF_WasUpToDate... - If MDF_WasUpToDate is not set go into Outdated disk state, - otherwise into Consistent state. + If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state, + otherwise into D_CONSISTENT state. */ - if (drbd_md_test_flag(mdev->bc, MDF_Consistent)) { - if (drbd_md_test_flag(mdev->bc, MDF_WasUpToDate)) - ns.disk = Consistent; + if (drbd_md_test_flag(mdev->bc, MDF_CONSISTENT)) { + if (drbd_md_test_flag(mdev->bc, MDF_WAS_UP_TO_DATE)) + ns.disk = D_CONSISTENT; else - ns.disk = Outdated; + ns.disk = D_OUTDATED; } else { - ns.disk = Inconsistent; + ns.disk = D_INCONSISTENT; } - if (drbd_md_test_flag(mdev->bc, MDF_PeerOutDated)) - ns.pdsk = Outdated; + if (drbd_md_test_flag(mdev->bc, MDF_PEER_OUT_DATED)) + ns.pdsk = D_OUTDATED; - if ( ns.disk == Consistent && - (ns.pdsk == Outdated || mdev->bc->dc.fencing == DontCare)) - ns.disk = UpToDate; + if ( ns.disk == D_CONSISTENT && + (ns.pdsk == D_OUTDATED || mdev->bc->dc.fencing == FP_DONT_CARE)) + ns.disk = D_UP_TO_DATE; - /* All tests on MDF_PrimaryInd, MDF_ConnectedInd, - MDF_Consistent and MDF_WasUpToDate must happen before + /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, + MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before this point, because drbd_request_state() modifies these flags. */ - /* In case we are Connected postpone any desicion on the new disk + /* In case we are C_CONNECTED postpone any desicion on the new disk state after the negotiation phase. */ - if (mdev->state.conn == Connected) { + if (mdev->state.conn == C_CONNECTED) { mdev->new_state_tmp.i = ns.i; ns.i = os.i; - ns.disk = Negotiating; + ns.disk = D_NEGOTIATING; } - rv = _drbd_set_state(mdev, ns, ChgStateVerbose, NULL); + rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); ns = mdev->state; spin_unlock_irq(&mdev->req_lock); - if (rv < SS_Success) + if (rv < SS_SUCCESS) goto force_diskless_dec; - if (mdev->state.role == Primary) - mdev->bc->md.uuid[Current] |= (u64)1; + if (mdev->state.role == R_PRIMARY) + mdev->bc->md.uuid[UI_CURRENT] |= (u64)1; else - mdev->bc->md.uuid[Current] &= ~(u64)1; + mdev->bc->md.uuid[UI_CURRENT] &= ~(u64)1; drbd_md_mark_dirty(mdev); drbd_md_sync(mdev); @@ -1163,7 +1155,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp force_diskless_dec: dec_local(mdev); force_diskless: - drbd_force_state(mdev, NS(disk, Diskless)); + drbd_force_state(mdev, NS(disk, D_DISKLESS)); drbd_md_sync(mdev); release_bdev2_fail: if (nbc) @@ -1190,7 +1182,7 @@ STATIC int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { fsync_bdev(mdev->this_bdev); - reply->ret_code = drbd_request_state(mdev, NS(disk, Diskless)); + reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); __set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ/20); /* 50ms; Time for worker to finally terminate */ @@ -1202,7 +1194,7 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { int i, ns; - enum ret_codes retcode; + enum drbd_ret_codes retcode; struct net_conf *new_conf = NULL; struct crypto_hash *tfm = NULL; struct crypto_hash *integrity_w_tfm = NULL; @@ -1214,15 +1206,16 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, void *int_dig_out = NULL; void *int_dig_in = NULL; void *int_dig_vv = NULL; + struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; - if (mdev->state.conn > StandAlone) { - retcode = HaveNetConfig; + if (mdev->state.conn > C_STANDALONE) { + retcode = ERR_NET_CONFIGURED; goto fail; } new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); if (!new_conf) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto fail; } @@ -1250,48 +1243,45 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { - retcode = UnknownMandatoryTag; + retcode = ERR_MANDATORY_TAG; goto fail; } if (new_conf->two_primaries && (new_conf->wire_protocol != DRBD_PROT_C)) { - retcode = ProtocolCRequired; + retcode = ERR_NOT_PROTO_C; goto fail; }; - if (mdev->state.role == Primary && new_conf->want_lose) { - retcode = DiscardNotAllowed; + if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { + retcode = ERR_DISCARD; goto fail; } -#define M_ADDR(A) (((struct sockaddr_in *)&A->my_addr)->sin_addr.s_addr) -#define M_PORT(A) (((struct sockaddr_in *)&A->my_addr)->sin_port) -#define O_ADDR(A) (((struct sockaddr_in *)&A->peer_addr)->sin_addr.s_addr) -#define O_PORT(A) (((struct sockaddr_in *)&A->peer_addr)->sin_port) - retcode = NoError; + retcode = NO_ERROR; + + new_my_addr = (struct sockaddr *)&new_conf->my_addr; + new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; for (i = 0; i < minor_count; i++) { odev = minor_to_mdev(i); if (!odev || odev == mdev) continue; if (inc_net(odev)) { - if (M_ADDR(new_conf) == M_ADDR(odev->net_conf) && - M_PORT(new_conf) == M_PORT(odev->net_conf)) - retcode = LAAlreadyInUse; + taken_addr = (struct sockaddr *)&odev->net_conf->my_addr; + if (new_conf->my_addr_len == odev->net_conf->my_addr_len && + !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) + retcode = ERR_LOCAL_ADDR; - if (O_ADDR(new_conf) == O_ADDR(odev->net_conf) && - O_PORT(new_conf) == O_PORT(odev->net_conf)) - retcode = OAAlreadyInUse; + taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr; + if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len && + !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) + retcode = ERR_PEER_ADDR; dec_net(odev); - if (retcode != NoError) + if (retcode != NO_ERROR) goto fail; } } -#undef M_ADDR -#undef M_PORT -#undef O_ADDR -#undef O_PORT if (new_conf->cram_hmac_alg[0] != 0) { snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", @@ -1299,13 +1289,13 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { tfm = NULL; - retcode = CRAMAlgNotAvail; + retcode = ERR_AUTH_ALG; goto fail; } if (crypto_tfm_alg_type(crypto_hash_tfm(tfm)) != CRYPTO_ALG_TYPE_HASH) { - retcode = CRAMAlgNotDigest; + retcode = ERR_AUTH_ALG_ND; goto fail; } } @@ -1314,19 +1304,19 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(integrity_w_tfm)) { integrity_w_tfm = NULL; - retcode=IntegrityAlgNotAvail; + retcode=ERR_INTEGRITY_ALG; goto fail; } if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) { - retcode=IntegrityAlgNotDigest; + retcode=ERR_INTEGRITY_ALG_ND; goto fail; } integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(integrity_r_tfm)) { integrity_r_tfm = NULL; - retcode=IntegrityAlgNotAvail; + retcode=ERR_INTEGRITY_ALG; goto fail; } } @@ -1335,7 +1325,7 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, if (mdev->tl_hash_s != ns) { new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); if (!new_tl_hash) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto fail; } } @@ -1344,59 +1334,35 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) { new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); if (!new_ee_hash) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto fail; } } ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; -#if 0 - /* for the connection loss logic in drbd_recv - * I _need_ the resulting timeo in jiffies to be - * non-zero and different - * - * XXX maybe rather store the value scaled to jiffies? - * Note: MAX_SCHEDULE_TIMEOUT/HZ*HZ != MAX_SCHEDULE_TIMEOUT - * and HZ > 10; which is unlikely to change... - * Thus, if interrupted by a signal, - * sock_{send,recv}msg returns -EINTR, - * if the timeout expires, -EAGAIN. - */ - /* unlikely: someone disabled the timeouts ... - * just put some huge values in there. */ - if (!new_conf->ping_int) - new_conf->ping_int = MAX_SCHEDULE_TIMEOUT/HZ; - if (!new_conf->timeout) - new_conf->timeout = MAX_SCHEDULE_TIMEOUT/HZ*10; - if (new_conf->ping_int*10 < new_conf->timeout) - new_conf->timeout = new_conf->ping_int*10/6; - if (new_conf->ping_int*10 == new_conf->timeout) - new_conf->ping_int = new_conf->ping_int+1; -#endif - if (integrity_w_tfm) { i = crypto_hash_digestsize(integrity_w_tfm); int_dig_out = kmalloc(i, GFP_KERNEL); if (!int_dig_out) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto fail; } int_dig_in = kmalloc(i, GFP_KERNEL); if (!int_dig_in) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto fail; } int_dig_vv = kmalloc(i, GFP_KERNEL); if (!int_dig_vv) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto fail; } } if (!mdev->bitmap) { if(drbd_bm_init(mdev)) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto fail; } } @@ -1435,8 +1401,8 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->int_dig_in=int_dig_in; mdev->int_dig_vv=int_dig_vv; - retcode = _drbd_request_state(mdev, NS(conn, Unconnected), ChgStateVerbose); - if (retcode >= SS_Success) + retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE); + if (retcode >= SS_SUCCESS) drbd_thread_start(&mdev->worker); kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); @@ -1463,40 +1429,40 @@ STATIC int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl { int retcode; - retcode = _drbd_request_state(mdev, NS(conn, Disconnecting), ChgOrdered); + retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); - if (retcode == SS_NothingToDo) + if (retcode == SS_NOTHING_TO_DO) goto done; - else if (retcode == SS_AlreadyStandAlone) + else if (retcode == SS_ALREADY_STANDALONE) goto done; - else if (retcode == SS_PrimaryNOP) { + else if (retcode == SS_PRIMARY_NOP) { /* Our statche checking code wants to see the peer outdated. */ - retcode = drbd_request_state(mdev, NS2(conn, Disconnecting, - pdsk, Outdated)); - } else if (retcode == SS_CW_FailedByPeer) { + retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, + pdsk, D_OUTDATED)); + } else if (retcode == SS_CW_FAILED_BY_PEER) { /* The peer probabely wants to see us outdated. */ - retcode = _drbd_request_state(mdev, NS2(conn, Disconnecting, - disk, Outdated), - ChgOrdered); - if (retcode == SS_IsDiskLess || retcode == SS_LowerThanOutdated) { - drbd_force_state(mdev, NS(conn, Disconnecting)); - retcode = SS_Success; + retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, + disk, D_OUTDATED), + CS_ORDERED); + if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) { + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + retcode = SS_SUCCESS; } } - if (retcode < SS_Success) + if (retcode < SS_SUCCESS) goto fail; if (wait_event_interruptible(mdev->state_wait, - mdev->state.conn != Disconnecting)) { - /* Do not test for mdev->state.conn == StandAlone, since + mdev->state.conn != C_DISCONNECTING)) { + /* Do not test for mdev->state.conn == C_STANDALONE, since someone else might connect us in the mean time! */ - retcode = GotSignal; + retcode = ERR_INTR; goto fail; } done: - retcode = NoError; + retcode = NO_ERROR; fail: drbd_md_sync(mdev); reply->ret_code = retcode; @@ -1509,43 +1475,43 @@ void resync_after_online_grow(struct drbd_conf *mdev) dev_info(DEV, "Resync of new storage after online grow\n"); if (mdev->state.role != mdev->state.peer) - iass = (mdev->state.role == Primary); + iass = (mdev->state.role == R_PRIMARY); else iass = test_bit(DISCARD_CONCURRENT, &mdev->flags); if (iass) - drbd_start_resync(mdev, SyncSource); + drbd_start_resync(mdev, C_SYNC_SOURCE); else - _drbd_request_state(mdev, NS(conn, WFSyncUUID), ChgStateVerbose + ChgSerialize); + _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); } STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { struct resize rs; - int retcode = NoError; + int retcode = NO_ERROR; int ldsc = 0; /* local disk size changed */ - enum determin_dev_size_enum dd; + enum determine_dev_size dd; memset(&rs, 0, sizeof(struct resize)); if (!resize_from_tags(mdev, nlp->tag_list, &rs)) { - retcode = UnknownMandatoryTag; + retcode = ERR_MANDATORY_TAG; goto fail; } - if (mdev->state.conn > Connected) { - retcode = NoResizeDuringResync; + if (mdev->state.conn > C_CONNECTED) { + retcode = ERR_RESIZE_RESYNC; goto fail; } - if (mdev->state.role == Secondary && - mdev->state.peer == Secondary) { - retcode = APrimaryNodeNeeded; + if (mdev->state.role == R_SECONDARY && + mdev->state.peer == R_SECONDARY) { + retcode = ERR_NO_PRIMARY; goto fail; } if (!inc_local(mdev)) { - retcode = HaveNoDiskConfig; + retcode = ERR_NO_DISK; goto fail; } @@ -1559,11 +1525,11 @@ STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, drbd_md_sync(mdev); dec_local(mdev); if (dd == dev_size_error) { - retcode = VMallocFailed; + retcode = ERR_NOMEM_BITMAP; goto fail; } - if (mdev->state.conn == Connected && (dd != unchanged || ldsc)) { + if (mdev->state.conn == C_CONNECTED && (dd != unchanged || ldsc)) { drbd_send_uuids(mdev); drbd_send_sizes(mdev); if (dd == grew) @@ -1578,7 +1544,7 @@ STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - int retcode = NoError; + int retcode = NO_ERROR; int err; int ovr; /* online verify running */ int rsr; /* re-sync running */ @@ -1598,19 +1564,19 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) { - retcode = UnknownMandatoryTag; + retcode = ERR_MANDATORY_TAG; goto fail; } if (sc.after != -1) { if (sc.after < -1 || minor_to_mdev(sc.after) == NULL) { - retcode = SyncAfterInvalid; + retcode = ERR_SYNC_AFTER; goto fail; } odev = minor_to_mdev(sc.after); /* check against loops in */ while (1) { if (odev == mdev) { - retcode = SyncAfterCycle; + retcode = ERR_SYNC_AFTER_CYCLE; goto fail; } if (odev->sync_conf.after == -1) @@ -1620,13 +1586,13 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } /* re-sync running */ - rsr = ( mdev->state.conn == SyncSource || - mdev->state.conn == SyncTarget || - mdev->state.conn == PausedSyncS || - mdev->state.conn == PausedSyncT ); + rsr = ( mdev->state.conn == C_SYNC_SOURCE || + mdev->state.conn == C_SYNC_TARGET || + mdev->state.conn == C_PAUSED_SYNC_S || + mdev->state.conn == C_PAUSED_SYNC_T ); if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) { - retcode = CSUMSResyncRunning; + retcode = ERR_CSUMS_RESYNC_RUNNING; goto fail; } @@ -1634,22 +1600,22 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(csums_tfm)) { csums_tfm = NULL; - retcode = CSUMSAlgNotAvail; + retcode = ERR_CSUMS_ALG; goto fail; } if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) { - retcode = CSUMSAlgNotDigest; + retcode = ERR_CSUMS_ALG_ND; goto fail; } } /* online verify running */ - ovr = (mdev->state.conn == VerifyS || mdev->state.conn == VerifyT); + ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T); if (ovr) { if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) { - retcode = VERIFYIsRunning; + retcode = ERR_VERIFY_RUNNING; goto fail; } } @@ -1658,12 +1624,12 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(verify_tfm)) { verify_tfm = NULL; - retcode = VERIFYAlgNotAvail; + retcode = ERR_VERIFY_ALG; goto fail; } if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) { - retcode = VERIFYAlgNotDigest; + retcode = ERR_VERIFY_ALG_ND; goto fail; } } @@ -1672,7 +1638,7 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n err = __bitmap_parse(sc.cpu_mask, 32, 0, (unsigned long *)&n_cpu_mask, NR_CPUS); if (err) { dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); - retcode = CPUMaskParseFailed; + retcode = ERR_CPU_MASK_PARSE; goto fail; } } @@ -1714,12 +1680,12 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n drbd_md_sync(mdev); if (err) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto fail; } } - if (mdev->state.conn >= Connected) + if (mdev->state.conn >= C_CONNECTED) drbd_send_sync_param(mdev, &sc); drbd_alter_sa(mdev, sc.after); @@ -1745,21 +1711,21 @@ STATIC int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl { int retcode; - retcode = _drbd_request_state(mdev, NS(conn, StartingSyncT), ChgOrdered); + retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); - if (retcode < SS_Success && retcode != SS_NeedConnection) - retcode = drbd_request_state(mdev, NS(conn, StartingSyncT)); + if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION) + retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); - while (retcode == SS_NeedConnection) { + while (retcode == SS_NEED_CONNECTION) { spin_lock_irq(&mdev->req_lock); - if (mdev->state.conn < Connected) - retcode = _drbd_set_state(_NS(mdev, disk, Inconsistent), ChgStateVerbose, NULL); + if (mdev->state.conn < C_CONNECTED) + retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL); spin_unlock_irq(&mdev->req_lock); - if (retcode != SS_NeedConnection) + if (retcode != SS_NEED_CONNECTION) break; - retcode = drbd_request_state(mdev, NS(conn, StartingSyncT)); + retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); } reply->ret_code = retcode; @@ -1770,7 +1736,7 @@ STATIC int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re struct drbd_nl_cfg_reply *reply) { - reply->ret_code = drbd_request_state(mdev, NS(conn, StartingSyncS)); + reply->ret_code = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); return 0; } @@ -1778,10 +1744,10 @@ STATIC int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re STATIC int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - int retcode = NoError; + int retcode = NO_ERROR; - if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NothingToDo) - retcode = PauseFlagAlreadySet; + if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO) + retcode = ERR_PAUSE_IS_SET; reply->ret_code = retcode; return 0; @@ -1790,10 +1756,10 @@ STATIC int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl STATIC int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - int retcode = NoError; + int retcode = NO_ERROR; - if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NothingToDo) - retcode = PauseFlagAlreadyClear; + if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) + retcode = ERR_PAUSE_IS_CLEAR; reply->ret_code = retcode; return 0; @@ -1817,7 +1783,7 @@ STATIC int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp STATIC int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - reply->ret_code = drbd_request_state(mdev, NS(disk, Outdated)); + reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED)); return 0; } @@ -1848,14 +1814,14 @@ STATIC int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp struct drbd_nl_cfg_reply *reply) { unsigned short *tl = reply->tag_list; - union drbd_state_t s = mdev->state; + union drbd_state s = mdev->state; unsigned long rs_left; unsigned int res; tl = get_state_to_tags(mdev, (struct get_state *)&s, tl); /* no local ref, no bitmap, no syncer progress. */ - if (s.conn >= SyncSource && s.conn <= PausedSyncT) { + if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) { if (inc_local(mdev)) { drbd_get_syncer_progress(mdev, &rs_left, &res); *tl++ = T_sync_progress; @@ -1880,9 +1846,9 @@ STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (inc_local(mdev)) { /* This is a hand crafted add tag ;) */ *tl++ = T_uuids; - *tl++ = UUID_SIZE*sizeof(u64); - memcpy(tl, mdev->bc->md.uuid, UUID_SIZE*sizeof(u64)); - tl = (unsigned short *)((char *)tl + UUID_SIZE*sizeof(u64)); + *tl++ = UI_SIZE*sizeof(u64); + memcpy(tl, mdev->bc->md.uuid, UI_SIZE*sizeof(u64)); + tl = (unsigned short *)((char *)tl + UI_SIZE*sizeof(u64)); *tl++ = T_uuids_flags; *tl++ = sizeof(int); memcpy(tl, &mdev->bc->md.flags, sizeof(int)); @@ -1906,8 +1872,8 @@ STATIC int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_r tl = reply->tag_list; - rv = mdev->state.pdsk == Outdated ? UT_PeerOutdated : - test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_Degraded : UT_Default; + rv = mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : + test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT; /* This is a hand crafted add tag ;) */ *tl++ = T_use_degraded; @@ -1922,7 +1888,7 @@ STATIC int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_r STATIC int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - reply->ret_code = drbd_request_state(mdev,NS(conn,VerifyS)); + reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); return 0; } @@ -1931,7 +1897,7 @@ STATIC int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - int retcode = NoError; + int retcode = NO_ERROR; int skip_initial_sync = 0; int err; @@ -1939,41 +1905,41 @@ STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl memset(&args, 0, sizeof(struct new_c_uuid)); if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) { - reply->ret_code = UnknownMandatoryTag; + reply->ret_code = ERR_MANDATORY_TAG; return 0; } mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */ if (!inc_local(mdev)) { - retcode = HaveNoDiskConfig; + retcode = ERR_NO_DISK; goto out; } /* this is "skip initial sync", assume to be clean */ - if (mdev->state.conn == Connected && mdev->agreed_pro_version >= 90 && - mdev->bc->md.uuid[Current] == UUID_JUST_CREATED && args.clear_bm) { + if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 && + mdev->bc->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { dev_info(DEV, "Preparing to skip initial sync\n"); skip_initial_sync = 1; - } else if (mdev->state.conn >= Connected) { - retcode = MayNotBeConnected; + } else if (mdev->state.conn >= C_CONNECTED) { + retcode = ERR_CONNECTED; goto out_dec; } - drbd_uuid_set(mdev, Bitmap, 0); /* Rotate Bitmap to History 1, etc... */ - drbd_uuid_new_current(mdev); /* New current, previous to Bitmap */ + drbd_uuid_set(mdev, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */ + drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */ if (args.clear_bm) { err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid"); if (err) { dev_err(DEV, "Writing bitmap failed with %d\n",err); - retcode = MDIOError; + retcode = ERR_IO_MD_DISK; } if (skip_initial_sync) { drbd_send_uuids_skip_initial_sync(mdev); - _drbd_uuid_set(mdev, Bitmap, 0); - _drbd_set_state(_NS2(mdev, disk, UpToDate, pdsk, UpToDate), - ChgStateVerbose, NULL); + _drbd_uuid_set(mdev, UI_BITMAP, 0); + _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), + CS_VERBOSE, NULL); } } @@ -2079,14 +2045,14 @@ STATIC void drbd_connector_callback(void *data) mdev = ensure_mdev(nlp); if (!mdev) { - retcode = MinorNotKnown; + retcode = ERR_MINOR_INVALID; goto fail; } - TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_packet(data);); + TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_packet(data);); if (nlp->packet_type >= P_nl_after_last_packet) { - retcode = UnknownNetLinkPacket; + retcode = ERR_PACKET_NR; goto fail; } @@ -2094,7 +2060,7 @@ STATIC void drbd_connector_callback(void *data) /* This may happen if packet number is 0: */ if (cm->function == NULL) { - retcode = UnknownNetLinkPacket; + retcode = ERR_PACKET_NR; goto fail; } @@ -2102,7 +2068,7 @@ STATIC void drbd_connector_callback(void *data) cn_reply = kmalloc(reply_size, GFP_KERNEL); if (!cn_reply) { - retcode = KMallocFailed; + retcode = ERR_NOMEM; goto fail; } reply = (struct drbd_nl_cfg_reply *) cn_reply->data; @@ -2110,7 +2076,7 @@ STATIC void drbd_connector_callback(void *data) reply->packet_type = cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet; reply->minor = nlp->drbd_minor; - reply->ret_code = NoError; /* Might by modified by cm->function. */ + reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ /* reply->tag_list; might be modified by cm->fucntion. */ rr = cm->function(mdev, nlp, reply); @@ -2121,7 +2087,7 @@ STATIC void drbd_connector_callback(void *data) cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr; cn_reply->flags = 0; - TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); if (rr && rr != -ESRCH) @@ -2192,7 +2158,7 @@ tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val) return tl; } -void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state_t state) +void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) { char buffer[sizeof(struct cn_msg)+ sizeof(struct drbd_nl_cfg_reply)+ @@ -2219,9 +2185,9 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state_t state) reply->packet_type = P_get_state; reply->minor = mdev_to_minor(mdev); - reply->ret_code = NoError; + reply->ret_code = NO_ERROR; - TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); } @@ -2258,9 +2224,9 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) reply->packet_type = P_call_helper; reply->minor = mdev_to_minor(mdev); - reply->ret_code = NoError; + reply->ret_code = NO_ERROR; - TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); } @@ -2268,7 +2234,7 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, const char* seen_hash, const char* calc_hash, - const struct Tl_epoch_entry* e) + const struct drbd_epoch_entry* e) { struct cn_msg *cn_reply; struct drbd_nl_cfg_reply *reply; @@ -2328,9 +2294,9 @@ void drbd_bcast_ee(struct drbd_conf *mdev, reply->packet_type = P_dump_ee; reply->minor = mdev_to_minor(mdev); - reply->ret_code = NoError; + reply->ret_code = NO_ERROR; - TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); kfree(cn_reply); @@ -2372,9 +2338,9 @@ void drbd_bcast_sync_progress(struct drbd_conf *mdev) reply->packet_type = P_sync_progress; reply->minor = mdev_to_minor(mdev); - reply->ret_code = NoError; + reply->ret_code = NO_ERROR; - TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); } @@ -2429,7 +2395,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code) reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; reply->ret_code = ret_code; - TRACE(TraceTypeNl, TraceLvlSummary, nl_trace_reply(cn_reply);); + TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); if (rr && rr != -ESRCH) diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index b209da0ff4ea..76b512180606 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -178,7 +178,7 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v) pe .. pending (waiting for ack or data reply) ua .. unack'd (still need to send ack or data reply) ap .. application requests accepted, but not yet completed - ep .. number of epochs currently "on the fly", BarrierAck pending + ep .. number of epochs currently "on the fly", P_BARRIER_ACK pending wo .. write ordering mode currently in use oos .. known out-of-sync kB */ @@ -196,9 +196,9 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v) sn = conns_to_name(mdev->state.conn); - if (mdev->state.conn == StandAlone && - mdev->state.disk == Diskless && - mdev->state.role == Secondary) { + if (mdev->state.conn == C_STANDALONE && + mdev->state.disk == D_DISKLESS && + mdev->state.role == R_SECONDARY) { seq_printf(seq, "%2d: cs:Unconfigured\n", i); } else { seq_printf(seq, @@ -234,11 +234,11 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v) seq_printf(seq, " oos:%lu\n", Bit2KB(drbd_bm_total_weight(mdev))); } - if (mdev->state.conn == SyncSource || - mdev->state.conn == SyncTarget) + if (mdev->state.conn == C_SYNC_SOURCE || + mdev->state.conn == C_SYNC_TARGET) drbd_syncer_progress(mdev, seq); - if (mdev->state.conn == VerifyS || mdev->state.conn == VerifyT) + if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T) seq_printf(seq, "\t%3d%% %lu/%lu\n", (int)((mdev->rs_total-mdev->ov_left) / (mdev->rs_total/100+1)), @@ -246,7 +246,7 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v) mdev->rs_total); #ifdef ENABLE_DYNAMIC_TRACE - if (proc_details >= 1 && inc_local_if_state(mdev, Failed)) { + if (proc_details >= 1 && inc_local_if_state(mdev, D_FAILED)) { lc_printf_stats(seq, mdev->resync); lc_printf_stats(seq, mdev->act_log); dec_local(mdev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 64408cdcab8d..26ac8fd0e1f4 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -59,17 +59,17 @@ struct flush_work { }; enum epoch_event { - EV_put, - EV_got_barrier_nr, - EV_barrier_done, - EV_became_last, - EV_cleanup = 32, /* used as flag */ + EV_PUT, + EV_GOT_BARRIER_NR, + EV_BARRIER_DONE, + EV_BECAME_LAST, + EV_CLEANUP = 32, /* used as flag */ }; enum finish_epoch { - FE_still_live, - FE_destroyed, - FE_recycled, + FE_STILL_LIVE, + FE_DESTROYED, + FE_RECYCLED, }; STATIC int drbd_do_handshake(struct drbd_conf *mdev); @@ -201,14 +201,14 @@ You must not have the req_lock: drbd_wait_ee_list_empty() */ -struct Tl_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, +struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, unsigned int data_size, gfp_t gfp_mask) __must_hold(local) { struct request_queue *q; - struct Tl_epoch_entry *e; + struct drbd_epoch_entry *e; struct bio_vec *bvec; struct page *page; struct bio *bio; @@ -284,7 +284,7 @@ struct Tl_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, e->epoch = NULL; e->flags = 0; - MTRACE(TraceTypeEE, TraceLvlAll, + MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, dev_info(DEV, "allocated EE sec=%llus size=%u ee=%p\n", (unsigned long long)sector, data_size, e); ); @@ -302,13 +302,13 @@ struct Tl_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, return NULL; } -void drbd_free_ee(struct drbd_conf *mdev, struct Tl_epoch_entry *e) +void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) { struct bio *bio = e->private_bio; struct bio_vec *bvec; int i; - MTRACE(TraceTypeEE, TraceLvlAll, + MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, dev_info(DEV, "Free EE sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); @@ -328,14 +328,14 @@ void drbd_free_ee(struct drbd_conf *mdev, struct Tl_epoch_entry *e) int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) { int count = 0; - struct Tl_epoch_entry *e; + struct drbd_epoch_entry *e; struct list_head *le; spin_lock_irq(&mdev->req_lock); while (!list_empty(list)) { le = list->next; list_del(le); - e = list_entry(le, struct Tl_epoch_entry, w.list); + e = list_entry(le, struct drbd_epoch_entry, w.list); drbd_free_ee(mdev, e); count++; } @@ -347,7 +347,7 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) STATIC void reclaim_net_ee(struct drbd_conf *mdev) { - struct Tl_epoch_entry *e; + struct drbd_epoch_entry *e; struct list_head *le, *tle; /* The EEs are always appended to the end of the list. Since @@ -356,7 +356,7 @@ STATIC void reclaim_net_ee(struct drbd_conf *mdev) stop to examine the list... */ list_for_each_safe(le, tle, &mdev->net_ee) { - e = list_entry(le, struct Tl_epoch_entry, w.list); + e = list_entry(le, struct drbd_epoch_entry, w.list); if (drbd_bio_has_active_page(e->private_bio)) break; list_del(le); @@ -377,7 +377,7 @@ STATIC void reclaim_net_ee(struct drbd_conf *mdev) STATIC int drbd_process_done_ee(struct drbd_conf *mdev) { LIST_HEAD(work_list); - struct Tl_epoch_entry *e, *t; + struct drbd_epoch_entry *e, *t; int ok = 1; spin_lock_irq(&mdev->req_lock); @@ -390,7 +390,7 @@ STATIC int drbd_process_done_ee(struct drbd_conf *mdev) * all ignore the last argument. */ list_for_each_entry_safe(e, t, &work_list, w.list) { - MTRACE(TraceTypeEE, TraceLvlAll, + MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, dev_info(DEV, "Process EE on done_ee sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); @@ -410,7 +410,7 @@ STATIC int drbd_process_done_ee(struct drbd_conf *mdev) void _drbd_clear_done_ee(struct drbd_conf *mdev) { struct list_head *le; - struct Tl_epoch_entry *e; + struct drbd_epoch_entry *e; struct drbd_epoch *epoch; int n = 0; @@ -420,7 +420,7 @@ void _drbd_clear_done_ee(struct drbd_conf *mdev) while (!list_empty(&mdev->done_ee)) { le = mdev->done_ee.next; list_del(le); - e = list_entry(le, struct Tl_epoch_entry, w.list); + e = list_entry(le, struct drbd_epoch_entry, w.list); if (mdev->net_conf->wire_protocol == DRBD_PROT_C || is_syncer_block_id(e->block_id)) ++n; @@ -432,9 +432,9 @@ void _drbd_clear_done_ee(struct drbd_conf *mdev) if (e->flags & EE_IS_BARRIER) { epoch = previous_epoch(mdev, e->epoch); if (epoch) - drbd_may_finish_epoch(mdev, epoch, EV_barrier_done + EV_cleanup); + drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE + EV_CLEANUP); } - drbd_may_finish_epoch(mdev, e->epoch, EV_put + EV_cleanup); + drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP); } drbd_free_ee(mdev, e); } @@ -568,7 +568,7 @@ STATIC int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) set_fs(oldfs); if (rv != size) - drbd_force_state(mdev, NS(conn, BrokenPipe)); + drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); return rv; } @@ -617,7 +617,7 @@ STATIC struct socket *drbd_try_connect(struct drbd_conf *mdev) goto out; /* connect may fail, peer not yet available. - * stay WFConnection, don't go Disconnecting! */ + * stay C_WF_CONNECTION, don't go Disconnecting! */ disconnect_on_error = 0; what = "connect"; err = sock->ops->connect(sock, @@ -643,7 +643,7 @@ out: dev_err(DEV, "%s failed, err = %d\n", what, err); } if (disconnect_on_error) - drbd_force_state(mdev, NS(conn, Disconnecting)); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); } dec_net(mdev); return sock; @@ -688,7 +688,7 @@ out: if (err < 0) { if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { dev_err(DEV, "%s failed, err = %d\n", what, err); - drbd_force_state(mdev, NS(conn, Disconnecting)); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); } } dec_net(mdev); @@ -697,16 +697,16 @@ out: } STATIC int drbd_send_fp(struct drbd_conf *mdev, - struct socket *sock, enum Drbd_Packet_Cmd cmd) + struct socket *sock, enum drbd_packets cmd) { - struct Drbd_Header *h = (struct Drbd_Header *) &mdev->data.sbuf.head; + struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } -STATIC enum Drbd_Packet_Cmd drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) +STATIC enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) { - struct Drbd_Header *h = (struct Drbd_Header *) &mdev->data.sbuf.head; + struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; int rr; rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); @@ -759,7 +759,7 @@ STATIC int drbd_connect(struct drbd_conf *mdev) if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n"); - if (drbd_request_state(mdev, NS(conn, WFConnection)) < SS_Success) + if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; clear_bit(DISCARD_CONCURRENT, &mdev->flags); @@ -780,11 +780,11 @@ STATIC int drbd_connect(struct drbd_conf *mdev) if (s) { if (!sock) { - drbd_send_fp(mdev, s, HandShakeS); + drbd_send_fp(mdev, s, P_HAND_SHAKE_S); sock = s; s = NULL; } else if (!msock) { - drbd_send_fp(mdev, s, HandShakeM); + drbd_send_fp(mdev, s, P_HAND_SHAKE_M); msock = s; s = NULL; } else { @@ -809,14 +809,14 @@ retry: drbd_socket_okay(mdev, &sock); drbd_socket_okay(mdev, &msock); switch (try) { - case HandShakeS: + case P_HAND_SHAKE_S: if (sock) { dev_warn(DEV, "initial packet S crossed\n"); sock_release(sock); } sock = s; break; - case HandShakeM: + case P_HAND_SHAKE_M: if (msock) { dev_warn(DEV, "initial packet M crossed\n"); sock_release(msock); @@ -832,7 +832,7 @@ retry: } } - if (mdev->state.conn <= Disconnecting) + if (mdev->state.conn <= C_DISCONNECTING) return -1; if (signal_pending(current)) { flush_signals(current); @@ -872,7 +872,7 @@ retry: /* NOT YET ... * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; - * first set it to the HandShake timeout, wich is hardcoded for now: */ + * first set it to the P_HAND_SHAKE timeout, wich is hardcoded for now: */ sock->sk->sk_sndtimeo = sock->sk->sk_rcvtimeo = 2*HZ; @@ -902,7 +902,7 @@ retry: } } - if (drbd_request_state(mdev, NS(conn, WFReportParams)) < SS_Success) + if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) return 0; sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; @@ -923,7 +923,7 @@ retry: return 1; } -STATIC int drbd_recv_header(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h) { int r; @@ -962,7 +962,7 @@ STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d dec_local(mdev); } - return drbd_may_finish_epoch(mdev, epoch, EV_barrier_done); + return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE); } /** @@ -979,8 +979,8 @@ STATIC int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags)) drbd_flush_after_epoch(mdev, epoch); - drbd_may_finish_epoch(mdev, epoch, EV_put | - (mdev->state.conn < Connected ? EV_cleanup : 0)); + drbd_may_finish_epoch(mdev, epoch, EV_PUT | + (mdev->state.conn < C_CONNECTED ? EV_CLEANUP : 0)); return 1; } @@ -996,13 +996,13 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, int finish, epoch_size; struct drbd_epoch *next_epoch; int schedule_flush = 0; - enum finish_epoch rv = FE_still_live; + enum finish_epoch rv = FE_STILL_LIVE; static char *epoch_event_str[] = { - [EV_put] = "put", - [EV_got_barrier_nr] = "got_barrier_nr", - [EV_barrier_done] = "barrier_done", - [EV_became_last] = "became_last", + [EV_PUT] = "put", + [EV_GOT_BARRIER_NR] = "got_barrier_nr", + [EV_BARRIER_DONE] = "barrier_done", + [EV_BECAME_LAST] = "became_last", }; spin_lock(&mdev->epoch_lock); @@ -1012,11 +1012,11 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, epoch_size = atomic_read(&epoch->epoch_size); - switch (ev & ~EV_cleanup) { - case EV_put: + switch (ev & ~EV_CLEANUP) { + case EV_PUT: atomic_dec(&epoch->active); break; - case EV_got_barrier_nr: + case EV_GOT_BARRIER_NR: set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags); /* Special case: If we just switched from WO_bio_barrier to @@ -1026,15 +1026,15 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, epoch == mdev->current_epoch) clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags); break; - case EV_barrier_done: + case EV_BARRIER_DONE: set_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags); break; - case EV_became_last: + case EV_BECAME_LAST: /* nothing to do*/ break; } - MTRACE(TraceTypeEpochs, TraceLvlAll, + MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_ALL, dev_info(DEV, "Update epoch %p/%d { size=%d active=%d %c%c n%c%c } ev=%s\n", epoch, epoch->barrier_nr, epoch_size, atomic_read(&epoch->active), test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) ? 'n' : '-', @@ -1053,7 +1053,7 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) || mdev->write_ordering == WO_none || (epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) || - ev & EV_cleanup) { + ev & EV_CLEANUP) { finish = 1; set_bit(DE_IS_FINISHING, &epoch->flags); } else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) && @@ -1063,7 +1063,7 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, } } if (finish) { - if (!(ev & EV_cleanup)) { + if (!(ev & EV_CLEANUP)) { spin_unlock(&mdev->epoch_lock); drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); spin_lock(&mdev->epoch_lock); @@ -1073,22 +1073,22 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, if (mdev->current_epoch != epoch) { next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); list_del(&epoch->list); - ev = EV_became_last | (ev & EV_cleanup); + ev = EV_BECAME_LAST | (ev & EV_CLEANUP); mdev->epochs--; - MTRACE(TraceTypeEpochs, TraceLvlSummary, + MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_SUMMARY, dev_info(DEV, "Freeing epoch %p/%d { size=%d } nr_epochs=%d\n", epoch, epoch->barrier_nr, epoch_size, mdev->epochs); ); kfree(epoch); - if (rv == FE_still_live) - rv = FE_destroyed; + if (rv == FE_STILL_LIVE) + rv = FE_DESTROYED; } else { epoch->flags = 0; atomic_set(&epoch->epoch_size, 0); /* atomic_set(&epoch->active, 0); is alrady zero */ - if (rv == FE_still_live) - rv = FE_recycled; + if (rv == FE_STILL_LIVE) + rv = FE_RECYCLED; } } @@ -1104,7 +1104,7 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, struct flush_work *fw; fw = kmalloc(sizeof(*fw), GFP_ATOMIC); if (fw) { - MTRACE(TraceTypeEpochs, TraceLvlMetrics, + MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_METRICS, dev_info(DEV, "Schedul flush %p/%d { size=%d } nr_epochs=%d\n", epoch, epoch->barrier_nr, epoch_size, mdev->epochs); ); @@ -1115,8 +1115,8 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, dev_warn(DEV, "Could not kmalloc a flush_work obj\n"); set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); /* That is not a recursion, only one level */ - drbd_may_finish_epoch(mdev, epoch, EV_barrier_done); - drbd_may_finish_epoch(mdev, epoch, EV_put); + drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE); + drbd_may_finish_epoch(mdev, epoch, EV_PUT); } } @@ -1156,7 +1156,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) */ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local) { - struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; struct bio *bio = e->private_bio; /* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place, @@ -1202,10 +1202,10 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea return 1; } -STATIC int receive_Barrier(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) { int rv, issue_flush; - struct Drbd_Barrier_Packet *p = (struct Drbd_Barrier_Packet *)h; + struct p_barrier *p = (struct p_barrier *)h; struct drbd_epoch *epoch; ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; @@ -1219,27 +1219,27 @@ STATIC int receive_Barrier(struct drbd_conf *mdev, struct Drbd_Header *h) drbd_kick_lo(mdev); mdev->current_epoch->barrier_nr = p->barrier; - rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_got_barrier_nr); + rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR); - /* BarrierAck may imply that the corresponding extent is dropped from + /* P_BARRIER_ACK may imply that the corresponding extent is dropped from * the activity log, which means it would not be resynced in case the - * Primary crashes now. + * R_PRIMARY crashes now. * Therefore we must send the barrier_ack after the barrier request was * completed. */ switch (mdev->write_ordering) { case WO_bio_barrier: case WO_none: - if (rv == FE_recycled) + if (rv == FE_RECYCLED) return TRUE; break; case WO_bdev_flush: case WO_drain_io: - D_ASSERT(rv == FE_still_live); + D_ASSERT(rv == FE_STILL_LIVE); set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags); drbd_wait_ee_list_empty(mdev, &mdev->active_ee); rv = drbd_flush_after_epoch(mdev, mdev->current_epoch); - if (rv == FE_recycled) + if (rv == FE_RECYCLED) return TRUE; /* The asender will send all the ACKs and barrier ACKs out, since @@ -1255,7 +1255,7 @@ STATIC int receive_Barrier(struct drbd_conf *mdev, struct Drbd_Header *h) drbd_wait_ee_list_empty(mdev, &mdev->active_ee); if (issue_flush) { rv = drbd_flush_after_epoch(mdev, mdev->current_epoch); - if (rv == FE_recycled) + if (rv == FE_RECYCLED) return TRUE; } @@ -1273,7 +1273,7 @@ STATIC int receive_Barrier(struct drbd_conf *mdev, struct Drbd_Header *h) list_add(&epoch->list, &mdev->current_epoch->list); mdev->current_epoch = epoch; mdev->epochs++; - MTRACE(TraceTypeEpochs, TraceLvlMetrics, + MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_METRICS, dev_info(DEV, "Allocat epoch %p/xxxx { } nr_epochs=%d\n", epoch, mdev->epochs); ); } else { @@ -1287,10 +1287,10 @@ STATIC int receive_Barrier(struct drbd_conf *mdev, struct Drbd_Header *h) /* used from receive_RSDataReply (recv_resync_read) * and from receive_Data */ -STATIC struct Tl_epoch_entry * +STATIC struct drbd_epoch_entry * read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local) { - struct Tl_epoch_entry *e; + struct drbd_epoch_entry *e; struct bio_vec *bvec; struct page *page; struct bio *bio; @@ -1440,7 +1440,7 @@ STATIC int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, * drbd_process_done_ee() by asender only */ STATIC int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; sector_t sector = e->sector; int ok; @@ -1448,12 +1448,12 @@ STATIC int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u if (likely(drbd_bio_uptodate(e->private_bio))) { drbd_set_in_sync(mdev, sector, e->size); - ok = drbd_send_ack(mdev, RSWriteAck, e); + ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e); } else { /* Record failure to sync */ drbd_rs_failed_io(mdev, sector, e->size); - ok = drbd_send_ack(mdev, NegAck, e); + ok = drbd_send_ack(mdev, P_NEG_ACK, e); ok &= drbd_io_error(mdev, FALSE); } dec_unacked(mdev); @@ -1463,7 +1463,7 @@ STATIC int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u STATIC int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) { - struct Tl_epoch_entry *e; + struct drbd_epoch_entry *e; e = read_in_block(mdev, ID_SYNCER, sector, data_size); if (!e) { @@ -1485,7 +1485,7 @@ STATIC int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si list_add(&e->w.list, &mdev->sync_ee); spin_unlock_irq(&mdev->req_lock); - MTRACE(TraceTypeEE, TraceLvlAll, + MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, dev_info(DEV, "submit EE (RS)WRITE sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); @@ -1497,13 +1497,13 @@ STATIC int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si return TRUE; } -STATIC int receive_DataReply(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) { struct drbd_request *req; sector_t sector; unsigned int header_size, data_size; int ok; - struct Drbd_Data_Packet *p = (struct Drbd_Data_Packet *)h; + struct p_data *p = (struct p_data *)h; header_size = sizeof(*p) - sizeof(*h); data_size = h->length - header_size; @@ -1537,12 +1537,12 @@ STATIC int receive_DataReply(struct drbd_conf *mdev, struct Drbd_Header *h) return ok; } -STATIC int receive_RSDataReply(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) { sector_t sector; unsigned int header_size, data_size; int ok; - struct Drbd_Data_Packet *p = (struct Drbd_Data_Packet *)h; + struct p_data *p = (struct p_data *)h; header_size = sizeof(*p) - sizeof(*h); data_size = h->length - header_size; @@ -1566,7 +1566,7 @@ STATIC int receive_RSDataReply(struct drbd_conf *mdev, struct Drbd_Header *h) ok = drbd_drain_block(mdev, data_size); - drbd_send_ack_dp(mdev, NegAck, p); + drbd_send_ack_dp(mdev, P_NEG_ACK, p); } return ok; @@ -1577,7 +1577,7 @@ STATIC int receive_RSDataReply(struct drbd_conf *mdev, struct Drbd_Header *h) */ STATIC int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; sector_t sector = e->sector; struct drbd_epoch *epoch; int ok = 1, pcmd; @@ -1585,20 +1585,20 @@ STATIC int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) if (e->flags & EE_IS_BARRIER) { epoch = previous_epoch(mdev, e->epoch); if (epoch) - drbd_may_finish_epoch(mdev, epoch, EV_barrier_done); + drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE); } if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { if (likely(drbd_bio_uptodate(e->private_bio))) { - pcmd = (mdev->state.conn >= SyncSource && - mdev->state.conn <= PausedSyncT && + pcmd = (mdev->state.conn >= C_SYNC_SOURCE && + mdev->state.conn <= C_PAUSED_SYNC_T && e->flags & EE_MAY_SET_IN_SYNC) ? - RSWriteAck : WriteAck; + P_RS_WRITE_ACK : P_WRITE_ACK; ok &= drbd_send_ack(mdev, pcmd, e); - if (pcmd == RSWriteAck) + if (pcmd == P_RS_WRITE_ACK) drbd_set_in_sync(mdev, sector, e->size); } else { - ok = drbd_send_ack(mdev, NegAck, e); + ok = drbd_send_ack(mdev, P_NEG_ACK, e); ok &= drbd_io_error(mdev, FALSE); /* we expect it to be marked out of sync anyways... * maybe assert this? */ @@ -1609,7 +1609,7 @@ STATIC int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) } /* we delete from the conflict detection hash _after_ we sent out the - * WriteAck / NegAck, to get the sequence number right. */ + * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (mdev->net_conf->two_primaries) { spin_lock_irq(&mdev->req_lock); D_ASSERT(!hlist_unhashed(&e->colision)); @@ -1619,18 +1619,18 @@ STATIC int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) D_ASSERT(hlist_unhashed(&e->colision)); } - drbd_may_finish_epoch(mdev, e->epoch, EV_put); + drbd_may_finish_epoch(mdev, e->epoch, EV_PUT); return ok; } STATIC int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; int ok = 1; D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); - ok = drbd_send_ack(mdev, DiscardAck, e); + ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); spin_lock_irq(&mdev->req_lock); D_ASSERT(!hlist_unhashed(&e->colision)); @@ -1645,11 +1645,11 @@ STATIC int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u /* Called from receive_Data. * Synchronize packets on sock with packets on msock. * - * This is here so even when a Data packet traveling via sock overtook an Ack + * This is here so even when a P_DATA packet traveling via sock overtook an Ack * packet traveling on msock, they are still processed in the order they have * been sent. * - * Note: we don't care for Ack packets overtaking Data packets. + * Note: we don't care for Ack packets overtaking P_DATA packets. * * In case packet_seq is larger than mdev->peer_seq number, there are * outstanding packets on the msock. We wait for them to arrive. @@ -1696,11 +1696,11 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) } /* mirrored write */ -STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_Data(struct drbd_conf *mdev, struct p_header *h) { sector_t sector; - struct Tl_epoch_entry *e; - struct Drbd_Data_Packet *p = (struct Drbd_Data_Packet *)h; + struct drbd_epoch_entry *e; + struct p_data *p = (struct p_data *)h; int header_size, data_size; int rw = WRITE; u32 dp_flags; @@ -1725,7 +1725,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) mdev->peer_seq++; spin_unlock(&mdev->peer_seq_lock); - drbd_send_ack_dp(mdev, NegAck, p); + drbd_send_ack_dp(mdev, P_NEG_ACK, p); atomic_inc(&mdev->current_epoch->epoch_size); return drbd_drain_block(mdev, data_size); } @@ -1752,7 +1752,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) a Barrier. */ epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list); if (epoch == e->epoch) { - MTRACE(TraceTypeEpochs, TraceLvlMetrics, + MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_METRICS, dev_info(DEV, "Add barrier %p/%d\n", epoch, epoch->barrier_nr); ); @@ -1762,7 +1762,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) } else { if (atomic_read(&epoch->epoch_size) > 1 || !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) { - MTRACE(TraceTypeEpochs, TraceLvlMetrics, + MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_METRICS, dev_info(DEV, "Add barrier %p/%d, setting bi in %p/%d\n", e->epoch, e->epoch->barrier_nr, epoch, epoch->barrier_nr); @@ -1823,7 +1823,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) * if any conflicting request is found * that has not yet been acked, * AND I have the "discard concurrent writes" flag: - * queue (via done_ee) the DiscardAck; OUT. + * queue (via done_ee) the P_DISCARD_ACK; OUT. * * if any conflicting request is found: * block the receiver, waiting on misc_wait @@ -1832,7 +1832,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) * * we do not just write after local io completion of those * requests, but only after req is done completely, i.e. - * we wait for the DiscardAck to arrive! + * we wait for the P_DISCARD_ACK to arrive! * * then proceed normally, i.e. submit. */ @@ -1880,7 +1880,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) spin_unlock_irq(&mdev->req_lock); - /* we could probably send that DiscardAck ourselves, + /* we could probably send that P_DISCARD_ACK ourselves, * but I don't like the receiver using the msock */ dec_local(mdev); @@ -1926,21 +1926,21 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct Drbd_Header *h) case DRBD_PROT_B: /* I really don't like it that the receiver thread * sends on the msock, but anyways */ - drbd_send_ack(mdev, RecvAck, e); + drbd_send_ack(mdev, P_RECV_ACK, e); break; case DRBD_PROT_A: /* nothing to do */ break; } - if (mdev->state.pdsk == Diskless) { + if (mdev->state.pdsk == D_DISKLESS) { /* In case we have the only disk of the cluster, */ drbd_set_out_of_sync(mdev, e->sector, e->size); e->flags |= EE_CALL_AL_COMPLETE_IO; drbd_al_begin_io(mdev, e->sector); } - MTRACE(TraceTypeEE, TraceLvlAll, + MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, dev_info(DEV, "submit EE (DATA)WRITE sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); @@ -1962,16 +1962,16 @@ out_interrupted: return FALSE; } -STATIC int receive_DataRequest(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); - struct Tl_epoch_entry *e; + struct drbd_epoch_entry *e; struct digest_info *di; int size, digest_size; unsigned int fault_type; - struct Drbd_BlockRequest_Packet *p = - (struct Drbd_BlockRequest_Packet *)h; + struct p_block_req *p = + (struct p_block_req *)h; const int brps = sizeof(*p)-sizeof(*h); if (drbd_recv(mdev, h->payload, brps) != brps) @@ -1991,12 +1991,12 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct Drbd_Header *h) return FALSE; } - if (!inc_local_if_state(mdev, UpToDate)) { + if (!inc_local_if_state(mdev, D_UP_TO_DATE)) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Can not satisfy peer's read request, " "no local data.\n"); - drbd_send_ack_rp(mdev, h->command == DataRequest ? NegDReply : - NegRSDReply , p); + drbd_send_ack_rp(mdev, h->command == P_DATA_REQUEST ? P_NEG_DREPLY : + P_NEG_RS_DREPLY , p); return TRUE; } @@ -2010,11 +2010,11 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct Drbd_Header *h) e->private_bio->bi_end_io = drbd_endio_read_sec; switch (h->command) { - case DataRequest: + case P_DATA_REQUEST: e->w.cb = w_e_end_data_req; fault_type = DRBD_FAULT_DT_RD; break; - case RSDataRequest: + case P_RS_DATA_REQUEST: e->w.cb = w_e_end_rsdata_req; fault_type = DRBD_FAULT_RS_RD; /* Eventually this should become asynchrously. Currently it @@ -2032,8 +2032,8 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct Drbd_Header *h) } break; - case OVReply: - case CsumRSRequest: + case P_OV_REPLY: + case P_CSUM_RS_REQUEST: fault_type = DRBD_FAULT_RS_RD; digest_size = h->length - brps ; di = kmalloc(sizeof(*di) + digest_size, GFP_KERNEL); @@ -2054,10 +2054,10 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct Drbd_Header *h) } e->block_id = (u64)(unsigned long)di; - if (h->command == CsumRSRequest) { + if (h->command == P_CSUM_RS_REQUEST) { D_ASSERT(mdev->agreed_pro_version >= 89); e->w.cb = w_e_end_csum_rs_req; - } else if (h->command == OVReply) { + } else if (h->command == P_OV_REPLY) { e->w.cb = w_e_end_ov_reply; dec_rs_pending(mdev); break; @@ -2073,7 +2073,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct Drbd_Header *h) } break; - case OVRequest: + case P_OV_REQUEST: e->w.cb = w_e_end_ov_req; fault_type = DRBD_FAULT_RS_RD; /* Eventually this should become asynchrously. Currently it @@ -2104,7 +2104,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct Drbd_Header *h) inc_unacked(mdev); - MTRACE(TraceTypeEE, TraceLvlAll, + MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, dev_info(DEV, "submit EE READ sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); @@ -2121,21 +2121,21 @@ STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) int self, peer, rv = -100; unsigned long ch_self, ch_peer; - self = mdev->bc->md.uuid[Bitmap] & 1; - peer = mdev->p_uuid[Bitmap] & 1; + self = mdev->bc->md.uuid[UI_BITMAP] & 1; + peer = mdev->p_uuid[UI_BITMAP] & 1; - ch_peer = mdev->p_uuid[UUID_SIZE]; + ch_peer = mdev->p_uuid[UI_SIZE]; ch_self = mdev->comm_bm_set; switch (mdev->net_conf->after_sb_0p) { - case Consensus: - case DiscardSecondary: - case CallHelper: + case ASB_CONSENSUS: + case ASB_DISCARD_SECONDARY: + case ASB_CALL_HELPER: dev_err(DEV, "Configuration error.\n"); break; - case Disconnect: + case ASB_DISCONNECT: break; - case DiscardYoungerPri: + case ASB_DISCARD_YOUNGER_PRI: if (self == 0 && peer == 1) { rv = -1; break; @@ -2145,7 +2145,7 @@ STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) break; } /* Else fall through to one of the other strategies... */ - case DiscardOlderPri: + case ASB_DISCARD_OLDER_PRI: if (self == 0 && peer == 1) { rv = 1; break; @@ -2157,7 +2157,7 @@ STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) /* Else fall through to one of the other strategies... */ dev_warn(DEV, "Discard younger/older primary did not found a decision\n" "Using discard-least-changes instead\n"); - case DiscardZeroChg: + case ASB_DISCARD_ZERO_CHG: if (ch_peer == 0 && ch_self == 0) { rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) ? -1 : 1; @@ -2166,9 +2166,9 @@ STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) if (ch_peer == 0) { rv = 1; break; } if (ch_self == 0) { rv = -1; break; } } - if (mdev->net_conf->after_sb_0p == DiscardZeroChg) + if (mdev->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG) break; - case DiscardLeastChg: + case ASB_DISCARD_LEAST_CHG: if (ch_self < ch_peer) rv = -1; else if (ch_self > ch_peer) @@ -2178,10 +2178,10 @@ STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) ? -1 : 1; break; - case DiscardLocal: + case ASB_DISCARD_LOCAL: rv = -1; break; - case DiscardRemote: + case ASB_DISCARD_REMOTE: rv = 1; } @@ -2192,36 +2192,36 @@ STATIC int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) { int self, peer, hg, rv = -100; - self = mdev->bc->md.uuid[Bitmap] & 1; - peer = mdev->p_uuid[Bitmap] & 1; + self = mdev->bc->md.uuid[UI_BITMAP] & 1; + peer = mdev->p_uuid[UI_BITMAP] & 1; switch (mdev->net_conf->after_sb_1p) { - case DiscardYoungerPri: - case DiscardOlderPri: - case DiscardLeastChg: - case DiscardLocal: - case DiscardRemote: + case ASB_DISCARD_YOUNGER_PRI: + case ASB_DISCARD_OLDER_PRI: + case ASB_DISCARD_LEAST_CHG: + case ASB_DISCARD_LOCAL: + case ASB_DISCARD_REMOTE: dev_err(DEV, "Configuration error.\n"); break; - case Disconnect: + case ASB_DISCONNECT: break; - case Consensus: + case ASB_CONSENSUS: hg = drbd_asb_recover_0p(mdev); - if (hg == -1 && mdev->state.role == Secondary) + if (hg == -1 && mdev->state.role == R_SECONDARY) rv = hg; - if (hg == 1 && mdev->state.role == Primary) + if (hg == 1 && mdev->state.role == R_PRIMARY) rv = hg; break; - case Violently: + case ASB_VIOLENTLY: rv = drbd_asb_recover_0p(mdev); break; - case DiscardSecondary: - return mdev->state.role == Primary ? 1 : -1; - case CallHelper: + case ASB_DISCARD_SECONDARY: + return mdev->state.role == R_PRIMARY ? 1 : -1; + case ASB_CALL_HELPER: hg = drbd_asb_recover_0p(mdev); - if (hg == -1 && mdev->state.role == Primary) { - self = drbd_set_role(mdev, Secondary, 0); - if (self != SS_Success) { + if (hg == -1 && mdev->state.role == R_PRIMARY) { + self = drbd_set_role(mdev, R_SECONDARY, 0); + if (self != SS_SUCCESS) { drbd_khelper(mdev, "pri-lost-after-sb"); } else { dev_warn(DEV, "Sucessfully gave up primary role.\n"); @@ -2238,29 +2238,29 @@ STATIC int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) { int self, peer, hg, rv = -100; - self = mdev->bc->md.uuid[Bitmap] & 1; - peer = mdev->p_uuid[Bitmap] & 1; + self = mdev->bc->md.uuid[UI_BITMAP] & 1; + peer = mdev->p_uuid[UI_BITMAP] & 1; switch (mdev->net_conf->after_sb_2p) { - case DiscardYoungerPri: - case DiscardOlderPri: - case DiscardLeastChg: - case DiscardLocal: - case DiscardRemote: - case Consensus: - case DiscardSecondary: + case ASB_DISCARD_YOUNGER_PRI: + case ASB_DISCARD_OLDER_PRI: + case ASB_DISCARD_LEAST_CHG: + case ASB_DISCARD_LOCAL: + case ASB_DISCARD_REMOTE: + case ASB_CONSENSUS: + case ASB_DISCARD_SECONDARY: dev_err(DEV, "Configuration error.\n"); break; - case Violently: + case ASB_VIOLENTLY: rv = drbd_asb_recover_0p(mdev); break; - case Disconnect: + case ASB_DISCONNECT: break; - case CallHelper: + case ASB_CALL_HELPER: hg = drbd_asb_recover_0p(mdev); if (hg == -1) { - self = drbd_set_role(mdev, Secondary, 0); - if (self != SS_Success) { + self = drbd_set_role(mdev, R_SECONDARY, 0); + if (self != SS_SUCCESS) { drbd_khelper(mdev, "pri-lost-after-sb"); } else { dev_warn(DEV, "Sucessfully gave up primary role.\n"); @@ -2282,21 +2282,21 @@ STATIC void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid, } dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n", text, - (unsigned long long)uuid[Current], - (unsigned long long)uuid[Bitmap], - (unsigned long long)uuid[History_start], - (unsigned long long)uuid[History_end], + (unsigned long long)uuid[UI_CURRENT], + (unsigned long long)uuid[UI_BITMAP], + (unsigned long long)uuid[UI_HISTORY_START], + (unsigned long long)uuid[UI_HISTORY_END], (unsigned long long)bits, (unsigned long long)flags); } /* 100 after split brain try auto recover - 2 SyncSource set BitMap - 1 SyncSource use BitMap + 2 C_SYNC_SOURCE set BitMap + 1 C_SYNC_SOURCE use BitMap 0 no Sync - -1 SyncTarget use BitMap - -2 SyncTarget set BitMap + -1 C_SYNC_TARGET use BitMap + -2 C_SYNC_TARGET set BitMap -100 after split brain, disconnect -1000 unrelated data */ @@ -2305,8 +2305,8 @@ STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l u64 self, peer; int i, j; - self = mdev->bc->md.uuid[Current] & ~((u64)1); - peer = mdev->p_uuid[Current] & ~((u64)1); + self = mdev->bc->md.uuid[UI_CURRENT] & ~((u64)1); + peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1); *rule_nr = 1; if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED) @@ -2327,11 +2327,11 @@ STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l int rct, dc; /* roles at crash time */ rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) + - (mdev->p_uuid[UUID_FLAGS] & 2); + (mdev->p_uuid[UI_FLAGS] & 2); /* lowest bit is set when we were primary, * next bit (weight 2) is set when peer was primary */ - MTRACE(TraceTypeUuid, TraceLvlMetrics, DUMPI(rct);); + MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, DUMPI(rct);); switch (rct) { case 0: /* !self_pri && !peer_pri */ return 0; @@ -2339,46 +2339,46 @@ STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l case 2: /* !self_pri && peer_pri */ return -1; case 3: /* self_pri && peer_pri */ dc = test_bit(DISCARD_CONCURRENT, &mdev->flags); - MTRACE(TraceTypeUuid, TraceLvlMetrics, DUMPI(dc);); + MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, DUMPI(dc);); return dc ? -1 : 1; } } *rule_nr = 5; - peer = mdev->p_uuid[Bitmap] & ~((u64)1); + peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1); if (self == peer) return -1; *rule_nr = 6; - for (i = History_start; i <= History_end; i++) { + for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { peer = mdev->p_uuid[i] & ~((u64)1); if (self == peer) return -2; } *rule_nr = 7; - self = mdev->bc->md.uuid[Bitmap] & ~((u64)1); - peer = mdev->p_uuid[Current] & ~((u64)1); + self = mdev->bc->md.uuid[UI_BITMAP] & ~((u64)1); + peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1); if (self == peer) return 1; *rule_nr = 8; - for (i = History_start; i <= History_end; i++) { + for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { self = mdev->bc->md.uuid[i] & ~((u64)1); if (self == peer) return 2; } *rule_nr = 9; - self = mdev->bc->md.uuid[Bitmap] & ~((u64)1); - peer = mdev->p_uuid[Bitmap] & ~((u64)1); + self = mdev->bc->md.uuid[UI_BITMAP] & ~((u64)1); + peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1); if (self == peer && self != ((u64)0)) return 100; *rule_nr = 10; - for (i = History_start; i <= History_end; i++) { + for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { self = mdev->p_uuid[i] & ~((u64)1); - for (j = History_start; j <= History_end; j++) { + for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) { peer = mdev->p_uuid[j] & ~((u64)1); if (self == peer) return -100; @@ -2389,37 +2389,37 @@ STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l } /* drbd_sync_handshake() returns the new conn state on success, or - conn_mask (-1) on failure. + CONN_MASK (-1) on failure. */ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role, enum drbd_disk_state peer_disk) __must_hold(local) { int hg, rule_nr; - enum drbd_conns rv = conn_mask; + enum drbd_conns rv = C_MASK; enum drbd_disk_state mydisk; mydisk = mdev->state.disk; - if (mydisk == Negotiating) + if (mydisk == D_NEGOTIATING) mydisk = mdev->new_state_tmp.disk; hg = drbd_uuid_compare(mdev, &rule_nr); dev_info(DEV, "drbd_sync_handshake:\n"); drbd_uuid_dump(mdev, "self", mdev->bc->md.uuid, - mdev->state.disk >= Negotiating ? drbd_bm_total_weight(mdev) : 0, 0); + mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); drbd_uuid_dump(mdev, "peer", mdev->p_uuid, - mdev->p_uuid[UUID_SIZE], mdev->p_uuid[UUID_FLAGS]); + mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr); if (hg == -1000) { dev_alert(DEV, "Unrelated data, aborting!\n"); - return conn_mask; + return C_MASK; } - if ((mydisk == Inconsistent && peer_disk > Inconsistent) || - (peer_disk == Inconsistent && mydisk > Inconsistent)) { + if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) || + (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) { int f = (hg == -100) || abs(hg) == 2; - hg = mydisk > Inconsistent ? 1 : -1; + hg = mydisk > D_INCONSISTENT ? 1 : -1; if (f) hg = hg*2; dev_info(DEV, "Becoming sync %s due to disk states.\n", @@ -2427,8 +2427,8 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) { - int pcount = (mdev->state.role == Primary) - + (peer_role == Primary); + int pcount = (mdev->state.role == R_PRIMARY) + + (peer_role == R_PRIMARY); int forced = (hg == -100); switch (pcount) { @@ -2455,9 +2455,9 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { - if (mdev->net_conf->want_lose && !(mdev->p_uuid[UUID_FLAGS]&1)) + if (mdev->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) hg = -1; - if (!mdev->net_conf->want_lose && (mdev->p_uuid[UUID_FLAGS]&1)) + if (!mdev->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) hg = 1; if (abs(hg) < 100) @@ -2469,24 +2469,24 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol if (hg == -100) { dev_alert(DEV, "Split-Brain detected, dropping connection!\n"); drbd_khelper(mdev, "split-brain"); - return conn_mask; + return C_MASK; } - if (hg > 0 && mydisk <= Inconsistent) { + if (hg > 0 && mydisk <= D_INCONSISTENT) { dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n"); - return conn_mask; + return C_MASK; } if (hg < 0 && /* by intention we do not use mydisk here. */ - mdev->state.role == Primary && mdev->state.disk >= Consistent) { + mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) { switch (mdev->net_conf->rr_conflict) { - case CallHelper: + case ASB_CALL_HELPER: drbd_khelper(mdev, "pri-lost"); /* fall through */ - case Disconnect: + case ASB_DISCONNECT: dev_err(DEV, "I shall become SyncTarget, but I am primary!\n"); - return conn_mask; - case Violently: + return C_MASK; + case ASB_VIOLENTLY: dev_warn(DEV, "Becoming SyncTarget, violating the stable-data" "assumption\n"); } @@ -2495,15 +2495,15 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol if (abs(hg) >= 2) { dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) - return conn_mask; + return C_MASK; } if (hg > 0) { /* become sync source. */ - rv = WFBitMapS; + rv = C_WF_BITMAP_S; } else if (hg < 0) { /* become sync target */ - rv = WFBitMapT; + rv = C_WF_BITMAP_T; } else { - rv = Connected; + rv = C_CONNECTED; if (drbd_bm_total_weight(mdev)) { dev_info(DEV, "No resync, but %lu bits in bitmap!\n", drbd_bm_total_weight(mdev)); @@ -2516,16 +2516,16 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } /* returns 1 if invalid */ -STATIC int cmp_after_sb(enum after_sb_handler peer, enum after_sb_handler self) +STATIC int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) { - /* DiscardRemote - DiscardLocal is valid */ - if ((peer == DiscardRemote && self == DiscardLocal) || - (self == DiscardRemote && peer == DiscardLocal)) + /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */ + if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) || + (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL)) return 0; - /* any other things with DiscardRemote or DiscardLocal are invalid */ - if (peer == DiscardRemote || peer == DiscardLocal || - self == DiscardRemote || self == DiscardLocal) + /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */ + if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL || + self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL) return 1; /* everything else is valid if they are equal on both sides. */ @@ -2536,9 +2536,9 @@ STATIC int cmp_after_sb(enum after_sb_handler peer, enum after_sb_handler self) return 1; } -STATIC int receive_protocol(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_protocol(struct drbd_conf *mdev, struct p_header *h) { - struct Drbd_Protocol_Packet *p = (struct Drbd_Protocol_Packet *)h; + struct p_protocol *p = (struct p_protocol *)h; int header_size, data_size; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_want_lose, p_two_primaries; @@ -2605,7 +2605,7 @@ STATIC int receive_protocol(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; disconnect: - drbd_force_state(mdev, NS(conn, Disconnecting)); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; } @@ -2636,19 +2636,19 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, return tfm; } -STATIC int receive_SyncParam(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) { int ok = TRUE; - struct Drbd_SyncParam89_Packet *p = (struct Drbd_SyncParam89_Packet *)h; + struct p_rs_param_89 *p = (struct p_rs_param_89 *)h; unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; const int apv = mdev->agreed_pro_version; - exp_max_sz = apv <= 87 ? sizeof(struct Drbd_SyncParam_Packet) - : apv == 88 ? sizeof(struct Drbd_SyncParam_Packet) + exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) + : apv == 88 ? sizeof(struct p_rs_param) + SHARED_SECRET_MAX - : /* 89 */ sizeof(struct Drbd_SyncParam89_Packet); + : /* 89 */ sizeof(struct p_rs_param_89); if (h->length > exp_max_sz) { dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", @@ -2657,10 +2657,10 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct Drbd_Header *h) } if (apv <= 88) { - header_size = sizeof(struct Drbd_SyncParam_Packet) - sizeof(*h); + header_size = sizeof(struct p_rs_param) - sizeof(*h); data_size = h->length - header_size; } else /* apv >= 89 */ { - header_size = sizeof(struct Drbd_SyncParam89_Packet) - sizeof(*h); + header_size = sizeof(struct p_rs_param_89) - sizeof(*h); data_size = h->length - header_size; D_ASSERT(data_size == 0); } @@ -2700,7 +2700,7 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct Drbd_Header *h) } if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) { - if (mdev->state.conn == WFReportParams) { + if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n", mdev->sync_conf.verify_alg, p->verify_alg); goto disconnect; @@ -2712,7 +2712,7 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct Drbd_Header *h) } if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) { - if (mdev->state.conn == WFReportParams) { + if (mdev->state.conn == C_WF_REPORT_PARAMS) { dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n", mdev->sync_conf.csums_alg, p->csums_alg); goto disconnect; @@ -2746,7 +2746,7 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct Drbd_Header *h) return ok; disconnect: crypto_free_hash(verify_tfm); - drbd_force_state(mdev, NS(conn, Disconnecting)); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; } @@ -2769,10 +2769,10 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, (unsigned long long)a, (unsigned long long)b); } -STATIC int receive_sizes(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) { - struct Drbd_Sizes_Packet *p = (struct Drbd_Sizes_Packet *)h; - enum determin_dev_size_enum dd = unchanged; + struct p_sizes *p = (struct p_sizes *)h; + enum determine_dev_size dd = unchanged; unsigned int max_seg_s; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ @@ -2785,9 +2785,9 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct Drbd_Header *h) p_size = be64_to_cpu(p->d_size); p_usize = be64_to_cpu(p->u_size); - if (p_size == 0 && mdev->state.disk == Diskless) { + if (p_size == 0 && mdev->state.disk == D_DISKLESS) { dev_err(DEV, "some backing storage is needed\n"); - drbd_force_state(mdev, NS(conn, Disconnecting)); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; } @@ -2804,7 +2804,7 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct Drbd_Header *h) /* if this is the first connect, or an otherwise expected * param exchange, choose the minimum */ - if (mdev->state.conn == WFReportParams) + if (mdev->state.conn == C_WF_REPORT_PARAMS) p_usize = min_not_zero((sector_t)mdev->bc->dc.disk_size, p_usize); @@ -2820,10 +2820,10 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct Drbd_Header *h) But allow online shrinking if we are connected. */ if (drbd_new_dev_size(mdev, mdev->bc) < drbd_get_capacity(mdev->this_bdev) && - mdev->state.disk >= Outdated && - mdev->state.conn < Connected) { + mdev->state.disk >= D_OUTDATED && + mdev->state.conn < C_CONNECTED) { dev_err(DEV, "The peer's disk size is too small!\n"); - drbd_force_state(mdev, NS(conn, Disconnecting)); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); mdev->bc->dc.disk_size = my_usize; dec_local(mdev); return FALSE; @@ -2843,18 +2843,18 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct Drbd_Header *h) drbd_set_my_capacity(mdev, p_size); } - if (mdev->p_uuid && mdev->state.conn <= Connected && inc_local(mdev)) { + if (mdev->p_uuid && mdev->state.conn <= C_CONNECTED && inc_local(mdev)) { nconn = drbd_sync_handshake(mdev, mdev->state.peer, mdev->state.pdsk); dec_local(mdev); - if (nconn == conn_mask) { - drbd_force_state(mdev, NS(conn, Disconnecting)); + if (nconn == C_MASK) { + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; } - if (drbd_request_state(mdev, NS(conn, nconn)) < SS_Success) { - drbd_force_state(mdev, NS(conn, Disconnecting)); + if (drbd_request_state(mdev, NS(conn, nconn)) < SS_SUCCESS) { + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; } } @@ -2873,16 +2873,16 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct Drbd_Header *h) dec_local(mdev); } - if (mdev->state.conn > WFReportParams) { + if (mdev->state.conn > C_WF_REPORT_PARAMS) { if (be64_to_cpu(p->c_size) != drbd_get_capacity(mdev->this_bdev) || ldsc) { /* we have different sizes, probabely peer * needs to know my new size... */ drbd_send_sizes(mdev); } - if (dd == grew && mdev->state.conn == Connected) { - if (mdev->state.pdsk >= Inconsistent && - mdev->state.disk >= Inconsistent) + if (dd == grew && mdev->state.conn == C_CONNECTED) { + if (mdev->state.pdsk >= D_INCONSISTENT && + mdev->state.disk >= D_INCONSISTENT) resync_after_online_grow(mdev); else set_bit(RESYNC_AFTER_NEG, &mdev->flags); @@ -2892,9 +2892,9 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; } -STATIC int receive_uuids(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_uuids(struct drbd_conf *mdev, struct p_header *h) { - struct Drbd_GenCnt_Packet *p = (struct Drbd_GenCnt_Packet *)h; + struct p_uuids *p = (struct p_uuids *)h; u64 *p_uuid; int i; @@ -2902,38 +2902,38 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct Drbd_Header *h) if (drbd_recv(mdev, h->payload, h->length) != h->length) return FALSE; - p_uuid = kmalloc(sizeof(u64)*EXT_UUID_SIZE, GFP_KERNEL); + p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_KERNEL); - for (i = Current; i < EXT_UUID_SIZE; i++) + for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) p_uuid[i] = be64_to_cpu(p->uuid[i]); kfree(mdev->p_uuid); mdev->p_uuid = p_uuid; - if (mdev->state.conn < Connected && - mdev->state.disk < Inconsistent && - mdev->state.role == Primary && - (mdev->ed_uuid & ~((u64)1)) != (p_uuid[Current] & ~((u64)1))) { + if (mdev->state.conn < C_CONNECTED && + mdev->state.disk < D_INCONSISTENT && + mdev->state.role == R_PRIMARY && + (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) { dev_err(DEV, "Can only connect to data with current UUID=%016llX\n", (unsigned long long)mdev->ed_uuid); - drbd_force_state(mdev, NS(conn, Disconnecting)); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; } if (inc_local(mdev)) { int skip_initial_sync = - mdev->state.conn == Connected && + mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 && - mdev->bc->md.uuid[Current] == UUID_JUST_CREATED && - (p_uuid[UUID_FLAGS] & 8); + mdev->bc->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && + (p_uuid[UI_FLAGS] & 8); if (skip_initial_sync) { dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from receive_uuids"); - _drbd_uuid_set(mdev, Current, p_uuid[Current]); - _drbd_uuid_set(mdev, Bitmap, 0); - _drbd_set_state(_NS2(mdev, disk, UpToDate, pdsk, UpToDate), - ChgStateVerbose, NULL); + _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]); + _drbd_uuid_set(mdev, UI_BITMAP, 0); + _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), + CS_VERBOSE, NULL); drbd_md_sync(mdev); } dec_local(mdev); @@ -2944,8 +2944,8 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct Drbd_Header *h) we are primary and are detaching from our disk. We need to see the new disk state... */ wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); - if (mdev->state.conn >= Connected && mdev->state.disk < Inconsistent) - drbd_set_ed_uuid(mdev, p_uuid[Current]); + if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) + drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); return TRUE; } @@ -2954,18 +2954,18 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct Drbd_Header *h) * convert_state: * Switches the view of the state. */ -STATIC union drbd_state_t convert_state(union drbd_state_t ps) +STATIC union drbd_state convert_state(union drbd_state ps) { - union drbd_state_t ms; + union drbd_state ms; static enum drbd_conns c_tab[] = { - [Connected] = Connected, + [C_CONNECTED] = C_CONNECTED, - [StartingSyncS] = StartingSyncT, - [StartingSyncT] = StartingSyncS, - [Disconnecting] = TearDown, /* NetworkFailure, */ - [VerifyS] = VerifyT, - [conn_mask] = conn_mask, + [C_STARTING_SYNC_S] = C_STARTING_SYNC_T, + [C_STARTING_SYNC_T] = C_STARTING_SYNC_S, + [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */ + [C_VERIFY_S] = C_VERIFY_T, + [C_MASK] = C_MASK, }; ms.i = ps.i; @@ -2980,10 +2980,10 @@ STATIC union drbd_state_t convert_state(union drbd_state_t ps) return ms; } -STATIC int receive_req_state(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_req_state(struct drbd_conf *mdev, struct p_header *h) { - struct Drbd_Req_State_Packet *p = (struct Drbd_Req_State_Packet *)h; - union drbd_state_t mask, val; + struct p_req_state *p = (struct p_req_state *)h; + union drbd_state mask, val; int rv; ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; @@ -2995,14 +2995,14 @@ STATIC int receive_req_state(struct drbd_conf *mdev, struct Drbd_Header *h) if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { - drbd_send_sr_reply(mdev, SS_ConcurrentStChg); + drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); return TRUE; } mask = convert_state(mask); val = convert_state(val); - rv = drbd_change_state(mdev, ChgStateVerbose, mask, val); + rv = drbd_change_state(mdev, CS_VERBOSE, mask, val); drbd_send_sr_reply(mdev, rv); drbd_md_sync(mdev); @@ -3010,11 +3010,11 @@ STATIC int receive_req_state(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; } -STATIC int receive_state(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_state(struct drbd_conf *mdev, struct p_header *h) { - struct Drbd_State_Packet *p = (struct Drbd_State_Packet *)h; + struct p_state *p = (struct p_state *)h; enum drbd_conns nconn, oconn; - union drbd_state_t ns, peer_state; + union drbd_state ns, peer_state; enum drbd_disk_state real_peer_disk; int rv; @@ -3027,8 +3027,8 @@ STATIC int receive_state(struct drbd_conf *mdev, struct Drbd_Header *h) peer_state.i = be32_to_cpu(p->state); real_peer_disk = peer_state.disk; - if (peer_state.disk == Negotiating) { - real_peer_disk = mdev->p_uuid[UUID_FLAGS] & 4 ? Inconsistent : Consistent; + if (peer_state.disk == D_NEGOTIATING) { + real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT; dev_info(DEV, "real peer disk state = %s\n", disks_to_name(real_peer_disk)); } @@ -3037,34 +3037,34 @@ STATIC int receive_state(struct drbd_conf *mdev, struct Drbd_Header *h) oconn = nconn = mdev->state.conn; spin_unlock_irq(&mdev->req_lock); - if (nconn == WFReportParams) - nconn = Connected; + if (nconn == C_WF_REPORT_PARAMS) + nconn = C_CONNECTED; - if (mdev->p_uuid && peer_state.disk >= Negotiating && - inc_local_if_state(mdev, Negotiating)) { + if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && + inc_local_if_state(mdev, D_NEGOTIATING)) { int cr; /* consider resync */ - cr = (oconn < Connected); - cr |= (oconn == Connected && - (peer_state.disk == Negotiating || - mdev->state.disk == Negotiating)); + cr = (oconn < C_CONNECTED); + cr |= (oconn == C_CONNECTED && + (peer_state.disk == D_NEGOTIATING || + mdev->state.disk == D_NEGOTIATING)); cr |= test_bit(CONSIDER_RESYNC, &mdev->flags); /* peer forced */ - cr |= (oconn == Connected && peer_state.conn > Connected); + cr |= (oconn == C_CONNECTED && peer_state.conn > C_CONNECTED); if (cr) nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk); dec_local(mdev); - if (nconn == conn_mask) { - if (mdev->state.disk == Negotiating) { - drbd_force_state(mdev, NS(disk, Diskless)); - nconn = Connected; - } else if (peer_state.disk == Negotiating) { + if (nconn == C_MASK) { + if (mdev->state.disk == D_NEGOTIATING) { + drbd_force_state(mdev, NS(disk, D_DISKLESS)); + nconn = C_CONNECTED; + } else if (peer_state.disk == D_NEGOTIATING) { dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); - peer_state.disk = Diskless; + peer_state.disk = D_DISKLESS; } else { - D_ASSERT(oconn == WFReportParams); - drbd_force_state(mdev, NS(conn, Disconnecting)); + D_ASSERT(oconn == C_WF_REPORT_PARAMS); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; } } @@ -3079,21 +3079,21 @@ STATIC int receive_state(struct drbd_conf *mdev, struct Drbd_Header *h) ns.peer = peer_state.role; ns.pdsk = real_peer_disk; ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); - if ((nconn == Connected || nconn == WFBitMapS) && ns.disk == Negotiating) + if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) ns.disk = mdev->new_state_tmp.disk; - rv = _drbd_set_state(mdev, ns, ChgStateVerbose | ChgStateHard, NULL); + rv = _drbd_set_state(mdev, ns, CS_VERBOSE | CS_HARD, NULL); ns = mdev->state; spin_unlock_irq(&mdev->req_lock); - if (rv < SS_Success) { - drbd_force_state(mdev, NS(conn, Disconnecting)); + if (rv < SS_SUCCESS) { + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; } - if (oconn > WFReportParams) { - if (nconn > Connected && peer_state.conn <= Connected && - peer_state.disk != Negotiating ) { + if (oconn > C_WF_REPORT_PARAMS) { + if (nconn > C_CONNECTED && peer_state.conn <= C_CONNECTED && + peer_state.disk != D_NEGOTIATING ) { /* we want resync, peer has not yet decided to sync... */ /* Nowadays only used when forcing a node into primary role and setting its disk to UpTpDate with that */ @@ -3109,15 +3109,15 @@ STATIC int receive_state(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; } -STATIC int receive_sync_uuid(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) { - struct Drbd_SyncUUID_Packet *p = (struct Drbd_SyncUUID_Packet *)h; + struct p_rs_uuid *p = (struct p_rs_uuid *)h; wait_event(mdev->misc_wait, - mdev->state.conn < Connected || - mdev->state.conn == WFSyncUUID); + mdev->state.conn < C_CONNECTED || + mdev->state.conn == C_WF_SYNC_UUID); - /* D_ASSERT( mdev->state.conn == WFSyncUUID ); */ + /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */ ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; if (drbd_recv(mdev, h->payload, h->length) != h->length) @@ -3125,11 +3125,11 @@ STATIC int receive_sync_uuid(struct drbd_conf *mdev, struct Drbd_Header *h) /* Here the _drbd_uuid_ functions are right, current should _not_ be rotated into the history */ - if (inc_local_if_state(mdev, Negotiating)) { - _drbd_uuid_set(mdev, Current, be64_to_cpu(p->uuid)); - _drbd_uuid_set(mdev, Bitmap, 0UL); + if (inc_local_if_state(mdev, D_NEGOTIATING)) { + _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid)); + _drbd_uuid_set(mdev, UI_BITMAP, 0UL); - drbd_start_resync(mdev, SyncTarget); + drbd_start_resync(mdev, C_SYNC_TARGET); dec_local(mdev); } else @@ -3141,7 +3141,7 @@ STATIC int receive_sync_uuid(struct drbd_conf *mdev, struct Drbd_Header *h) enum receive_bitmap_ret { OK, DONE, FAILED }; static enum receive_bitmap_ret -receive_bitmap_plain(struct drbd_conf *mdev, struct Drbd_Header *h, +receive_bitmap_plain(struct drbd_conf *mdev, struct p_header *h, unsigned long *buffer, struct bm_xfer_ctx *c) { unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); @@ -3168,7 +3168,7 @@ receive_bitmap_plain(struct drbd_conf *mdev, struct Drbd_Header *h, static enum receive_bitmap_ret recv_bm_rle_bits(struct drbd_conf *mdev, - struct Drbd_Compressed_Bitmap_Packet *p, + struct p_compressed_bm *p, struct bm_xfer_ctx *c) { struct bitstream bs; @@ -3228,7 +3228,7 @@ recv_bm_rle_bits(struct drbd_conf *mdev, static enum receive_bitmap_ret recv_bm_rle_bytes(struct drbd_conf *mdev, - struct Drbd_Compressed_Bitmap_Packet *p, + struct p_compressed_bm *p, struct bm_xfer_ctx *c) { u64 rl; @@ -3282,7 +3282,7 @@ recv_bm_rle_bytes(struct drbd_conf *mdev, static enum receive_bitmap_ret decode_bitmap_c(struct drbd_conf *mdev, - struct Drbd_Compressed_Bitmap_Packet *p, + struct p_compressed_bm *p, struct bm_xfer_ctx *c) { switch (DCBP_get_code(p)) { @@ -3304,7 +3304,7 @@ decode_bitmap_c(struct drbd_conf *mdev, void INFO_bm_xfer_stats(struct drbd_conf *mdev, const char *direction, struct bm_xfer_ctx *c) { - unsigned plain_would_take = sizeof(struct Drbd_Header) * + unsigned plain_would_take = sizeof(struct p_header) * ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) + c->bm_words * sizeof(long); unsigned total = c->bytes[0] + c->bytes[1]; @@ -3334,7 +3334,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, in order to be agnostic to the 32 vs 64 bits issue. returns 0 on failure, 1 if we suceessfully received it. */ -STATIC int receive_bitmap(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) { struct bm_xfer_ctx c; void *buffer; @@ -3359,12 +3359,12 @@ STATIC int receive_bitmap(struct drbd_conf *mdev, struct Drbd_Header *h) }; do { - if (h->command == ReportBitMap) { + if (h->command == P_BITMAP) { ret = receive_bitmap_plain(mdev, h, buffer, &c); - } else if (h->command == ReportCBitMap) { + } else if (h->command == P_COMPRESSED_BITMAP) { /* MAYBE: sanity check that we speak proto >= 90, * and the feature is enabled! */ - struct Drbd_Compressed_Bitmap_Packet *p; + struct p_compressed_bm *p; if (h->length > BM_PACKET_PAYLOAD_BYTES) { dev_err(DEV, "ReportCBitmap packet too large\n"); @@ -3385,8 +3385,8 @@ STATIC int receive_bitmap(struct drbd_conf *mdev, struct Drbd_Header *h) goto out; } - c.packets[h->command == ReportBitMap]++; - c.bytes[h->command == ReportBitMap] += sizeof(struct Drbd_Header) + h->length; + c.packets[h->command == P_BITMAP]++; + c.bytes[h->command == P_BITMAP] += sizeof(struct p_header) + h->length; if (ret != OK) break; @@ -3399,15 +3399,15 @@ STATIC int receive_bitmap(struct drbd_conf *mdev, struct Drbd_Header *h) INFO_bm_xfer_stats(mdev, "receive", &c); - if (mdev->state.conn == WFBitMapT) { + if (mdev->state.conn == C_WF_BITMAP_T) { ok = !drbd_send_bitmap(mdev); if (!ok) goto out; - /* Omit ChgOrdered with this state transition to avoid deadlocks. */ - ok = _drbd_request_state(mdev, NS(conn, WFSyncUUID), ChgStateVerbose); - D_ASSERT(ok == SS_Success); - } else if (mdev->state.conn != WFBitMapS) { - /* admin may have requested Disconnecting, + /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ + ok = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); + D_ASSERT(ok == SS_SUCCESS); + } else if (mdev->state.conn != C_WF_BITMAP_S) { + /* admin may have requested C_DISCONNECTING, * other threads may have noticed network errors */ dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n", conns_to_name(mdev->state.conn)); @@ -3416,13 +3416,13 @@ STATIC int receive_bitmap(struct drbd_conf *mdev, struct Drbd_Header *h) ok = TRUE; out: drbd_bm_unlock(mdev); - if (ok && mdev->state.conn == WFBitMapS) - drbd_start_resync(mdev, SyncSource); + if (ok && mdev->state.conn == C_WF_BITMAP_S) + drbd_start_resync(mdev, C_SYNC_SOURCE); free_page((unsigned long) buffer); return ok; } -STATIC int receive_skip(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_skip(struct drbd_conf *mdev, struct p_header *h) { /* TODO zero copy sink :) */ static char sink[128]; @@ -3441,9 +3441,9 @@ STATIC int receive_skip(struct drbd_conf *mdev, struct Drbd_Header *h) return size == 0; } -STATIC int receive_UnplugRemote(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h) { - if (mdev->state.disk >= Inconsistent) + if (mdev->state.disk >= D_INCONSISTENT) drbd_kick_lo(mdev); /* Make sure we've acked all the TCP data associated @@ -3453,32 +3453,32 @@ STATIC int receive_UnplugRemote(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; } -typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct Drbd_Header *); +typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *); static drbd_cmd_handler_f drbd_default_handler[] = { - [Data] = receive_Data, - [DataReply] = receive_DataReply, - [RSDataReply] = receive_RSDataReply, - [Barrier] = receive_Barrier, - [ReportBitMap] = receive_bitmap, - [ReportCBitMap] = receive_bitmap, - [UnplugRemote] = receive_UnplugRemote, - [DataRequest] = receive_DataRequest, - [RSDataRequest] = receive_DataRequest, - [SyncParam] = receive_SyncParam, - [SyncParam89] = receive_SyncParam, - [ReportProtocol] = receive_protocol, - [ReportUUIDs] = receive_uuids, - [ReportSizes] = receive_sizes, - [ReportState] = receive_state, - [StateChgRequest] = receive_req_state, - [ReportSyncUUID] = receive_sync_uuid, - [OVRequest] = receive_DataRequest, - [OVReply] = receive_DataRequest, - [CsumRSRequest] = receive_DataRequest, + [P_DATA] = receive_Data, + [P_DATA_REPLY] = receive_DataReply, + [P_RS_DATA_REPLY] = receive_RSDataReply, + [P_BARRIER] = receive_Barrier, + [P_BITMAP] = receive_bitmap, + [P_COMPRESSED_BITMAP] = receive_bitmap, + [P_UNPLUG_REMOTE] = receive_UnplugRemote, + [P_DATA_REQUEST] = receive_DataRequest, + [P_RS_DATA_REQUEST] = receive_DataRequest, + [P_SYNC_PARAM] = receive_SyncParam, + [P_SYNC_PARAM89] = receive_SyncParam, + [P_PROTOCOL] = receive_protocol, + [P_UUIDS] = receive_uuids, + [P_SIZES] = receive_sizes, + [P_STATE] = receive_state, + [P_STATE_CHG_REQ] = receive_req_state, + [P_SYNC_UUID] = receive_sync_uuid, + [P_OV_REQUEST] = receive_DataRequest, + [P_OV_REPLY] = receive_DataRequest, + [P_CSUM_RS_REQUEST] = receive_DataRequest, /* anything missing from this table is in * the asender_tbl, see get_asender_cmd */ - [MAX_CMD] = NULL, + [P_MAX_CMD] = NULL, }; static drbd_cmd_handler_f *drbd_cmd_handler = drbd_default_handler; @@ -3487,19 +3487,19 @@ static drbd_cmd_handler_f *drbd_opt_cmd_handler; STATIC void drbdd(struct drbd_conf *mdev) { drbd_cmd_handler_f handler; - struct Drbd_Header *header = &mdev->data.rbuf.head; + struct p_header *header = &mdev->data.rbuf.header; while (get_t_state(&mdev->receiver) == Running) { drbd_thread_current_set_cpu(mdev); if (!drbd_recv_header(mdev, header)) break; - if (header->command < MAX_CMD) + if (header->command < P_MAX_CMD) handler = drbd_cmd_handler[header->command]; - else if (MayIgnore < header->command - && header->command < MAX_OPT_CMD) - handler = drbd_opt_cmd_handler[header->command-MayIgnore]; - else if (header->command > MAX_OPT_CMD) + else if (P_MAY_IGNORE < header->command + && header->command < P_MAX_OPT_CMD) + handler = drbd_opt_cmd_handler[header->command-P_MAY_IGNORE]; + else if (header->command > P_MAX_OPT_CMD) handler = receive_skip; else handler = NULL; @@ -3507,13 +3507,13 @@ STATIC void drbdd(struct drbd_conf *mdev) if (unlikely(!handler)) { dev_err(DEV, "unknown packet type %d, l: %d!\n", header->command, header->length); - drbd_force_state(mdev, NS(conn, ProtocolError)); + drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); break; } if (unlikely(!handler(mdev, header))) { dev_err(DEV, "error receiving %s, l: %d!\n", cmdname(header->command), header->length); - drbd_force_state(mdev, NS(conn, ProtocolError)); + drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); break; } @@ -3558,14 +3558,14 @@ STATIC void drbd_fail_pending_reads(struct drbd_conf *mdev) STATIC void drbd_disconnect(struct drbd_conf *mdev) { struct drbd_work prev_work_done; - enum fencing_policy fp; - union drbd_state_t os, ns; - int rv = SS_UnknownError; + enum drbd_fencing_p fp; + union drbd_state os, ns; + int rv = SS_UNKNOWN_ERROR; unsigned int i; - if (mdev->state.conn == StandAlone) + if (mdev->state.conn == C_STANDALONE) return; - if (mdev->state.conn >= WFConnection) + if (mdev->state.conn >= C_WF_CONNECTION) dev_err(DEV, "ASSERT FAILED cstate = %s, expected < WFConnection\n", conns_to_name(mdev->state.conn)); @@ -3586,10 +3586,10 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) /* We do not have data structures that would allow us to * get the rs_pending_cnt down to 0 again. - * * On SyncTarget we do not have any data structures describing + * * On C_SYNC_TARGET we do not have any data structures describing * the pending RSDataRequest's we have sent. - * * On SyncSource there is no data structure that tracks - * the RSDataReply blocks that we sent to the SyncTarget. + * * On C_SYNC_SOURCE there is no data structure that tracks + * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget. * And no, it is not the sum of the reference counts in the * resync_LRU. The resync_LRU tracks the whole operation including * the disk-IO, while the rs_pending_cnt only tracks the blocks @@ -3625,14 +3625,14 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) drbd_md_sync(mdev); - fp = DontCare; + fp = FP_DONT_CARE; if (inc_local(mdev)) { fp = mdev->bc->dc.fencing; dec_local(mdev); } - if (mdev->state.role == Primary) { - if (fp >= Resource && mdev->state.pdsk >= DUnknown) { + if (mdev->state.role == R_PRIMARY) { + if (fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) { enum drbd_disk_state nps = drbd_try_outdate_peer(mdev); drbd_request_state(mdev, NS(pdsk, nps)); } @@ -3640,15 +3640,15 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) spin_lock_irq(&mdev->req_lock); os = mdev->state; - if (os.conn >= Unconnected) { - /* Do not restart in case we are Disconnecting */ + if (os.conn >= C_UNCONNECTED) { + /* Do not restart in case we are C_DISCONNECTING */ ns = os; - ns.conn = Unconnected; - rv = _drbd_set_state(mdev, ns, ChgStateVerbose, NULL); + ns.conn = C_UNCONNECTED; + rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); } spin_unlock_irq(&mdev->req_lock); - if (os.conn == Disconnecting) { + if (os.conn == C_DISCONNECTING) { struct hlist_head *h; wait_event(mdev->misc_wait, atomic_read(&mdev->net_cnt) == 0); @@ -3681,7 +3681,7 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) kfree(mdev->net_conf); mdev->net_conf = NULL; - drbd_request_state(mdev, NS(conn, StandAlone)); + drbd_request_state(mdev, NS(conn, C_STANDALONE)); } /* they do trigger all the time. @@ -3715,7 +3715,7 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) STATIC int drbd_send_handshake(struct drbd_conf *mdev) { /* ASSERT current == mdev->receiver ... */ - struct Drbd_HandShake_Packet *p = &mdev->data.sbuf.HandShake; + struct p_handshake *p = &mdev->data.sbuf.handshake; int ok; if (mutex_lock_interruptible(&mdev->data.mutex)) { @@ -3731,8 +3731,8 @@ STATIC int drbd_send_handshake(struct drbd_conf *mdev) memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - ok = _drbd_send_cmd( mdev, mdev->data.socket, HandShake, - (struct Drbd_Header *)p, sizeof(*p), 0 ); + ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE, + (struct p_header *)p, sizeof(*p), 0 ); mutex_unlock(&mdev->data.mutex); return ok; } @@ -3747,9 +3747,9 @@ STATIC int drbd_send_handshake(struct drbd_conf *mdev) int drbd_do_handshake(struct drbd_conf *mdev) { /* ASSERT current == mdev->receiver ... */ - struct Drbd_HandShake_Packet *p = &mdev->data.rbuf.HandShake; - const int expect = sizeof(struct Drbd_HandShake_Packet) - -sizeof(struct Drbd_Header); + struct p_handshake *p = &mdev->data.rbuf.handshake; + const int expect = sizeof(struct p_handshake) + -sizeof(struct p_header); int rv; rv = drbd_send_handshake(mdev); @@ -3760,7 +3760,7 @@ int drbd_do_handshake(struct drbd_conf *mdev) if (!rv) return 0; - if (p->head.command != HandShake) { + if (p->head.command != P_HAND_SHAKE) { dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n", cmdname(p->head.command), p->head.command); return -1; @@ -3822,7 +3822,7 @@ int drbd_do_auth(struct drbd_conf *mdev) char *response = NULL; char *right_response = NULL; char *peers_ch = NULL; - struct Drbd_Header p; + struct p_header p; unsigned int key_len = strlen(mdev->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; @@ -3841,7 +3841,7 @@ int drbd_do_auth(struct drbd_conf *mdev) get_random_bytes(my_challenge, CHALLENGE_LEN); - rv = drbd_send_cmd2(mdev, AuthChallenge, my_challenge, CHALLENGE_LEN); + rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); if (!rv) goto fail; @@ -3849,7 +3849,7 @@ int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - if (p.command != AuthChallenge) { + if (p.command != P_AUTH_CHALLENGE) { dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n", cmdname(p.command), p.command); rv = 0; @@ -3895,7 +3895,7 @@ int drbd_do_auth(struct drbd_conf *mdev) goto fail; } - rv = drbd_send_cmd2(mdev, AuthResponse, response, resp_size); + rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size); if (!rv) goto fail; @@ -3903,7 +3903,7 @@ int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - if (p.command != AuthResponse) { + if (p.command != P_AUTH_RESPONSE) { dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n", cmdname(p.command), p.command); rv = 0; @@ -3955,7 +3955,7 @@ int drbd_do_auth(struct drbd_conf *mdev) } #endif -STATIC int drbdd_init(struct Drbd_thread *thi) +STATIC int drbdd_init(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; unsigned int minor = mdev_to_minor(mdev); @@ -3974,7 +3974,7 @@ STATIC int drbdd_init(struct Drbd_thread *thi) } if (h == -1) { dev_warn(DEV, "Discarding network configuration.\n"); - drbd_force_state(mdev, NS(conn, Disconnecting)); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); } } while (h == 0); @@ -3993,13 +3993,13 @@ STATIC int drbdd_init(struct Drbd_thread *thi) /* ********* acknowledge sender ******** */ -STATIC int got_RqSReply(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) { - struct Drbd_RqS_Reply_Packet *p = (struct Drbd_RqS_Reply_Packet *)h; + struct p_req_state_reply *p = (struct p_req_state_reply *)h; int retcode = be32_to_cpu(p->retcode); - if (retcode >= SS_Success) { + if (retcode >= SS_SUCCESS) { set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); } else { set_bit(CL_ST_CHG_FAIL, &mdev->flags); @@ -4011,13 +4011,13 @@ STATIC int got_RqSReply(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; } -STATIC int got_Ping(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int got_Ping(struct drbd_conf *mdev, struct p_header *h) { return drbd_send_ping_ack(mdev); } -STATIC int got_PingAck(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int got_PingAck(struct drbd_conf *mdev, struct p_header *h) { /* restore idle timeout */ mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; @@ -4025,9 +4025,9 @@ STATIC int got_PingAck(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; } -STATIC int got_IsInSync(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) { - struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); @@ -4044,10 +4044,10 @@ STATIC int got_IsInSync(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; } -STATIC int got_BlockAck(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) { struct drbd_request *req; - struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); @@ -4067,19 +4067,19 @@ STATIC int got_BlockAck(struct drbd_conf *mdev, struct Drbd_Header *h) } switch (be16_to_cpu(h->command)) { - case RSWriteAck: + case P_RS_WRITE_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); _req_mod(req, write_acked_by_peer_and_sis, 0); break; - case WriteAck: + case P_WRITE_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); _req_mod(req, write_acked_by_peer, 0); break; - case RecvAck: + case P_RECV_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B); _req_mod(req, recv_acked_by_peer, 0); break; - case DiscardAck: + case P_DISCARD_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); dev_alert(DEV, "Got DiscardAck packet %llus +%u!" " DRBD is not a random data generator!\n", @@ -4096,9 +4096,9 @@ STATIC int got_BlockAck(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; } -STATIC int got_NegAck(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int got_NegAck(struct drbd_conf *mdev, struct p_header *h) { - struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); struct drbd_request *req; @@ -4130,10 +4130,10 @@ STATIC int got_NegAck(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; } -STATIC int got_NegDReply(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) { struct drbd_request *req; - struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); spin_lock_irq(&mdev->req_lock); @@ -4155,11 +4155,11 @@ STATIC int got_NegDReply(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; } -STATIC int got_NegRSDReply(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) { sector_t sector; int size; - struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + struct p_block_ack *p = (struct p_block_ack *)h; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -4169,7 +4169,7 @@ STATIC int got_NegRSDReply(struct drbd_conf *mdev, struct Drbd_Header *h) dec_rs_pending(mdev); - if (inc_local_if_state(mdev, Failed)) { + if (inc_local_if_state(mdev, D_FAILED)) { drbd_rs_complete_io(mdev, sector); drbd_rs_failed_io(mdev, sector, size); dec_local(mdev); @@ -4178,18 +4178,18 @@ STATIC int got_NegRSDReply(struct drbd_conf *mdev, struct Drbd_Header *h) return TRUE; } -STATIC int got_BarrierAck(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) { - struct Drbd_BarrierAck_Packet *p = (struct Drbd_BarrierAck_Packet *)h; + struct p_barrier_ack *p = (struct p_barrier_ack *)h; tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); return TRUE; } -STATIC int got_OVResult(struct drbd_conf *mdev, struct Drbd_Header *h) +STATIC int got_OVResult(struct drbd_conf *mdev, struct p_header *h) { - struct Drbd_BlockAck_Packet *p = (struct Drbd_BlockAck_Packet *)h; + struct p_block_ack *p = (struct p_block_ack *)h; struct drbd_work *w; sector_t sector; int size; @@ -4222,7 +4222,7 @@ STATIC int got_OVResult(struct drbd_conf *mdev, struct Drbd_Header *h) struct asender_cmd { size_t pkt_size; - int (*process)(struct drbd_conf *mdev, struct Drbd_Header *h); + int (*process)(struct drbd_conf *mdev, struct p_header *h); }; static struct asender_cmd *get_asender_cmd(int cmd) @@ -4231,36 +4231,34 @@ static struct asender_cmd *get_asender_cmd(int cmd) /* anything missing from this table is in * the drbd_cmd_handler (drbd_default_handler) table, * see the beginning of drbdd() */ - [Ping] = { sizeof(struct Drbd_Header), got_Ping }, - [PingAck] = { sizeof(struct Drbd_Header), got_PingAck }, - [RecvAck] = { sizeof(struct Drbd_BlockAck_Packet), got_BlockAck }, - [WriteAck] = { sizeof(struct Drbd_BlockAck_Packet), got_BlockAck }, - [RSWriteAck] = { sizeof(struct Drbd_BlockAck_Packet), got_BlockAck }, - [DiscardAck] = { sizeof(struct Drbd_BlockAck_Packet), got_BlockAck }, - [NegAck] = { sizeof(struct Drbd_BlockAck_Packet), got_NegAck }, - [NegDReply] = { sizeof(struct Drbd_BlockAck_Packet), got_NegDReply }, - [NegRSDReply] = { sizeof(struct Drbd_BlockAck_Packet), got_NegRSDReply}, - [OVResult] = { sizeof(struct Drbd_BlockAck_Packet), got_OVResult }, - [BarrierAck] = { sizeof(struct Drbd_BarrierAck_Packet), got_BarrierAck }, - [StateChgReply] = { sizeof(struct Drbd_RqS_Reply_Packet), got_RqSReply }, - [RSIsInSync] = { sizeof(struct Drbd_BlockAck_Packet), got_IsInSync }, - [MAX_CMD] = { 0, NULL }, + [P_PING] = { sizeof(struct p_header), got_Ping }, + [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, + [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, + [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, + [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply}, + [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, + [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, + [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, }; - if (cmd > MAX_CMD) + if (cmd > P_MAX_CMD) return NULL; return &asender_tbl[cmd]; } -STATIC int drbd_asender(struct Drbd_thread *thi) +STATIC int drbd_asender(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; - struct Drbd_Header *h = &mdev->meta.rbuf.head; + struct p_header *h = &mdev->meta.rbuf.header; struct asender_cmd *cmd = NULL; int rv, len; void *buf = h; int received = 0; - int expect = sizeof(struct Drbd_Header); + int expect = sizeof(struct p_header); int empty; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); @@ -4360,7 +4358,7 @@ STATIC int drbd_asender(struct Drbd_thread *thi) goto disconnect; } expect = cmd->pkt_size; - ERR_IF(len != expect-sizeof(struct Drbd_Header)) { + ERR_IF(len != expect-sizeof(struct p_header)) { dump_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); DUMPI(expect); goto reconnect; @@ -4374,22 +4372,22 @@ STATIC int drbd_asender(struct Drbd_thread *thi) buf = h; received = 0; - expect = sizeof(struct Drbd_Header); + expect = sizeof(struct p_header); cmd = NULL; } } if (0) { reconnect: - drbd_force_state(mdev, NS(conn, NetworkFailure)); + drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); } if (0) { disconnect: - drbd_force_state(mdev, NS(conn, Disconnecting)); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); } clear_bit(SIGNAL_ASENDER, &mdev->flags); - D_ASSERT(mdev->state.conn < Connected); + D_ASSERT(mdev->state.conn < C_CONNECTED); dev_info(DEV, "asender terminated\n"); return 0; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index cace6b7d9d27..cbfcb6b8b4d4 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -93,9 +93,9 @@ STATIC void _print_req_mod(struct drbd_request *req, enum drbd_req_event what) # ifdef ENABLE_DYNAMIC_TRACE # define print_rq_state(R, T) \ - MTRACE(TraceTypeRq, TraceLvlMetrics, _print_rq_state(R, T);) + MTRACE(TRACE_TYPE_RQ, TRACE_LVL_METRICS, _print_rq_state(R, T);) # define print_req_mod(T, W) \ - MTRACE(TraceTypeRq, TraceLvlMetrics, _print_req_mod(T, W);) + MTRACE(TRACE_TYPE_RQ, TRACE_LVL_METRICS, _print_req_mod(T, W);) # else # define print_rq_state(R, T) _print_rq_state(R, T) # define print_req_mod(T, W) _print_req_mod(T, W) @@ -166,7 +166,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const * we would forget to resync the corresponding extent. */ if (s & RQ_LOCAL_MASK) { - if (inc_local_if_state(mdev, Failed)) { + if (inc_local_if_state(mdev, D_FAILED)) { drbd_al_complete_io(mdev, req->sector); dec_local(mdev); } else if (__ratelimit(&drbd_ratelimit_state)) { @@ -207,7 +207,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const static void queue_barrier(struct drbd_conf *mdev) { - struct drbd_barrier *b; + struct drbd_tl_epoch *b; /* We are within the req_lock. Once we queued the barrier for sending, * we set the CREATE_BARRIER bit. It is cleared as soon as a new @@ -217,7 +217,7 @@ static void queue_barrier(struct drbd_conf *mdev) if (test_bit(CREATE_BARRIER, &mdev->flags)) return; - b = mdev->newest_barrier; + b = mdev->newest_tle; b->w.cb = w_send_barrier; /* inc_ap_pending done here, so we won't * get imbalanced on connection loss. @@ -233,14 +233,14 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, { const unsigned long s = req->rq_state; struct drbd_request *i; - struct Tl_epoch_entry *e; + struct drbd_epoch_entry *e; struct hlist_node *n; struct hlist_head *slot; /* before we can signal completion to the upper layers, * we may need to close the current epoch */ - if (mdev->state.conn >= Connected && - req->epoch == mdev->newest_barrier->br_number) + if (mdev->state.conn >= C_CONNECTED && + req->epoch == mdev->newest_tle->br_number) queue_barrier(mdev); /* we need to do the conflict detection stuff, @@ -269,7 +269,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * * currently, there can be only _one_ such ee * (well, or some more, which would be pending - * DiscardAck not yet sent by the asender...), + * P_DISCARD_ACK not yet sent by the asender...), * since we block the receiver thread upon the * first conflict detection, which will wait on * misc_wait. maybe we want to assert that? @@ -324,8 +324,8 @@ void _req_may_be_done(struct drbd_request *req, int error) if (req->master_bio) { /* this is data_received (remote read) - * or protocol C WriteAck - * or protocol B RecvAck + * or protocol C P_WRITE_ACK + * or protocol B P_RECV_ACK * or protocol A "handed_over_to_network" (SendAck) * or canceled or failed, * or killed from the transfer log due to connection loss. @@ -367,8 +367,8 @@ void _req_may_be_done(struct drbd_request *req, int error) if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { /* this is disconnected (local only) operation, - * or protocol C WriteAck, - * or protocol A or B BarrierAck, + * or protocol C P_WRITE_ACK, + * or protocol A or B P_BARRIER_ACK, * or killed from the transfer log due to connection loss. */ _req_is_done(mdev, req, rw); } @@ -405,7 +405,7 @@ STATIC int _req_conflicts(struct drbd_request *req) const sector_t sector = req->sector; const int size = req->size; struct drbd_request *i; - struct Tl_epoch_entry *e; + struct drbd_epoch_entry *e; struct hlist_node *n; struct hlist_head *slot; @@ -624,12 +624,12 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) * just after it grabs the req_lock */ D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); - req->epoch = mdev->newest_barrier->br_number; + req->epoch = mdev->newest_tle->br_number; list_add_tail(&req->tl_requests, - &mdev->newest_barrier->requests); + &mdev->newest_tle->requests); /* increment size of current epoch */ - mdev->newest_barrier->n_req++; + mdev->newest_tle->n_req++; /* queue work item to send data */ D_ASSERT(req->rq_state & RQ_NET_PENDING); @@ -638,7 +638,7 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) drbd_queue_work(&mdev->data.work, &req->w); /* close the epoch, in case it outgrew the limit */ - if (mdev->newest_barrier->n_req >= mdev->net_conf->max_epoch_size) + if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size) queue_barrier(mdev); break; @@ -709,7 +709,7 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) * A barrier request is expected to have forced all prior * requests onto stable storage, so completion of a barrier * request could set NET_DONE right here, and not wait for the - * BarrierAck, but that is an unecessary optimisation. */ + * P_BARRIER_ACK, but that is an unecessary optimisation. */ /* this makes it effectively the same as for: */ case recv_acked_by_peer: @@ -770,13 +770,13 @@ STATIC int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s unsigned long sbnr, ebnr; sector_t esector, nr_sectors; - if (mdev->state.disk == UpToDate) + if (mdev->state.disk == D_UP_TO_DATE) return 1; - if (mdev->state.disk >= Outdated) + if (mdev->state.disk >= D_OUTDATED) return 0; - if (mdev->state.disk < Inconsistent) + if (mdev->state.disk < D_INCONSISTENT) return 0; - /* state.disk == Inconsistent We will have a look at the BitMap */ + /* state.disk == D_INCONSISTENT We will have a look at the BitMap */ nr_sectors = drbd_get_capacity(mdev->this_bdev); esector = sector + (size >> 9) - 1; @@ -794,7 +794,7 @@ STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) const int rw = bio_rw(bio); const int size = bio->bi_size; const sector_t sector = bio->bi_sector; - struct drbd_barrier *b = NULL; + struct drbd_tl_epoch *b = NULL; struct drbd_request *req; int local, remote; int err = -EIO; @@ -834,18 +834,18 @@ STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) dec_local(mdev); } } - remote = !local && mdev->state.pdsk >= UpToDate; + remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; } /* If we have a disk, but a READA request is mapped to remote, - * we are Primary, Inconsistent, SyncTarget. + * we are R_PRIMARY, D_INCONSISTENT, SyncTarget. * Just fail that READA request right here. * * THINK: maybe fail all READA when not local? * or make this configurable... * if network is slow, READA won't do any good. */ - if (rw == READA && mdev->state.disk >= Inconsistent && !local) { + if (rw == READA && mdev->state.disk >= D_INCONSISTENT && !local) { err = -EWOULDBLOCK; goto fail_and_free_req; } @@ -858,9 +858,9 @@ STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) if (rw == WRITE && local) drbd_al_begin_io(mdev, sector); - remote = remote && (mdev->state.pdsk == UpToDate || - (mdev->state.pdsk == Inconsistent && - mdev->state.conn >= Connected)); + remote = remote && (mdev->state.pdsk == D_UP_TO_DATE || + (mdev->state.pdsk == D_INCONSISTENT && + mdev->state.conn >= C_CONNECTED)); if (!(local || remote)) { dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); @@ -868,16 +868,16 @@ STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) } /* For WRITE request, we have to make sure that we have an - * unused_spare_barrier, in case we need to start a new epoch. + * unused_spare_tle, in case we need to start a new epoch. * I try to be smart and avoid to pre-allocate always "just in case", * but there is a race between testing the bit and pointer outside the * spinlock, and grabbing the spinlock. * if we lost that race, we retry. */ if (rw == WRITE && remote && - mdev->unused_spare_barrier == NULL && + mdev->unused_spare_tle == NULL && test_bit(CREATE_BARRIER, &mdev->flags)) { allocate_barrier: - b = kmalloc(sizeof(struct drbd_barrier), GFP_NOIO); + b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO); if (!b) { dev_err(DEV, "Failed to alloc barrier.\n"); err = -ENOMEM; @@ -889,9 +889,9 @@ allocate_barrier: spin_lock_irq(&mdev->req_lock); if (remote) { - remote = (mdev->state.pdsk == UpToDate || - (mdev->state.pdsk == Inconsistent && - mdev->state.conn >= Connected)); + remote = (mdev->state.pdsk == D_UP_TO_DATE || + (mdev->state.pdsk == D_INCONSISTENT && + mdev->state.conn >= C_CONNECTED)); if (!remote) dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); if (!(local || remote)) { @@ -901,12 +901,12 @@ allocate_barrier: } } - if (b && mdev->unused_spare_barrier == NULL) { - mdev->unused_spare_barrier = b; + if (b && mdev->unused_spare_tle == NULL) { + mdev->unused_spare_tle = b; b = NULL; } if (rw == WRITE && remote && - mdev->unused_spare_barrier == NULL && + mdev->unused_spare_tle == NULL && test_bit(CREATE_BARRIER, &mdev->flags)) { /* someone closed the current epoch * while we were grabbing the spinlock */ @@ -928,10 +928,10 @@ allocate_barrier: * barrier packet. To get the write ordering right, we only have to * make sure that, if this is a write request and it triggered a * barrier packet, this request is queued within the same spinlock. */ - if (remote && mdev->unused_spare_barrier && + if (remote && mdev->unused_spare_tle && test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { - _tl_add_barrier(mdev, mdev->unused_spare_barrier); - mdev->unused_spare_barrier = NULL; + _tl_add_barrier(mdev, mdev->unused_spare_tle); + mdev->unused_spare_tle = NULL; } else { D_ASSERT(!(remote && rw == WRITE && test_bit(CREATE_BARRIER, &mdev->flags))); @@ -988,7 +988,7 @@ allocate_barrier: /* NOTE remote first: to get the concurrent write detection right, * we must register the request before start of local IO. */ if (remote) { - /* either WRITE and Connected, + /* either WRITE and C_CONNECTED, * or READ, and no local disk, * or READ, but not in sync. */ @@ -1044,11 +1044,11 @@ fail_and_free_req: static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) { /* Unconfigured */ - if (mdev->state.conn == Disconnecting && - mdev->state.disk == Diskless) + if (mdev->state.conn == C_DISCONNECTING && + mdev->state.disk == D_DISKLESS) return 1; - if (mdev->state.role != Primary && + if (mdev->state.role != R_PRIMARY && (!allow_oos || is_write)) { if (__ratelimit(&drbd_ratelimit_state)) { dev_err(DEV, "Process %s[%u] tried to %s; " @@ -1069,7 +1069,7 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) * to serialize state changes, this is racy, since we may lose * the connection *after* we test for the cstate. */ - if (mdev->state.disk < UpToDate && mdev->state.pdsk < UpToDate) { + if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sorry, I have no access to good data anymore.\n"); return 1; diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 6c7c9635da30..8866ea62f431 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -39,7 +39,7 @@ Try to get the locking right :) */ /* - * Objects of type struct drbd_request do only exist on a Primary node, and are + * Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are * associated with IO requests originating from the block layer above us. * * There are quite a few things that may happen to a drbd request @@ -168,7 +168,7 @@ enum drbd_req_state_bits { __RQ_NET_SENT, /* when set, the request may be freed (if RQ_NET_QUEUED is clear). - * basically this means the corresponding BarrierAck was received */ + * basically this means the corresponding P_BARRIER_ACK was received */ __RQ_NET_DONE, /* whether or not we know (C) or pretend (B,A) that the write diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index 491019c8331d..b230693f35e6 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -26,90 +26,88 @@ #include static const char *drbd_conn_s_names[] = { - [StandAlone] = "StandAlone", - [Disconnecting] = "Disconnecting", - [Unconnected] = "Unconnected", - [Timeout] = "Timeout", - [BrokenPipe] = "BrokenPipe", - [NetworkFailure] = "NetworkFailure", - [ProtocolError] = "ProtocolError", - [WFConnection] = "WFConnection", - [WFReportParams] = "WFReportParams", - [TearDown] = "TearDown", - [Connected] = "Connected", - [StartingSyncS] = "StartingSyncS", - [StartingSyncT] = "StartingSyncT", - [WFBitMapS] = "WFBitMapS", - [WFBitMapT] = "WFBitMapT", - [WFSyncUUID] = "WFSyncUUID", - [SyncSource] = "SyncSource", - [SyncTarget] = "SyncTarget", - [VerifyS] = "VerifyS", - [VerifyT] = "VerifyT", - [PausedSyncS] = "PausedSyncS", - [PausedSyncT] = "PausedSyncT" + [C_STANDALONE] = "StandAlone", + [C_DISCONNECTING] = "Disconnecting", + [C_UNCONNECTED] = "Unconnected", + [C_TIMEOUT] = "Timeout", + [C_BROKEN_PIPE] = "BrokenPipe", + [C_NETWORK_FAILURE] = "NetworkFailure", + [C_PROTOCOL_ERROR] = "ProtocolError", + [C_WF_CONNECTION] = "WFConnection", + [C_WF_REPORT_PARAMS] = "WFReportParams", + [C_TEAR_DOWN] = "TearDown", + [C_CONNECTED] = "Connected", + [C_STARTING_SYNC_S] = "StartingSyncS", + [C_STARTING_SYNC_T] = "StartingSyncT", + [C_WF_BITMAP_S] = "WFBitMapS", + [C_WF_BITMAP_T] = "WFBitMapT", + [C_WF_SYNC_UUID] = "WFSyncUUID", + [C_SYNC_SOURCE] = "SyncSource", + [C_SYNC_TARGET] = "SyncTarget", + [C_PAUSED_SYNC_S] = "PausedSyncS", + [C_PAUSED_SYNC_T] = "PausedSyncT", + [C_VERIFY_S] = "VerifyS", + [C_VERIFY_T] = "VerifyT", }; static const char *drbd_role_s_names[] = { - [Primary] = "Primary", - [Secondary] = "Secondary", - [Unknown] = "Unknown" + [R_PRIMARY] = "Primary", + [R_SECONDARY] = "Secondary", + [R_UNKNOWN] = "Unknown" }; static const char *drbd_disk_s_names[] = { - [Diskless] = "Diskless", - [Attaching] = "Attaching", - [Failed] = "Failed", - [Negotiating] = "Negotiating", - [Inconsistent] = "Inconsistent", - [Outdated] = "Outdated", - [DUnknown] = "DUnknown", - [Consistent] = "Consistent", - [UpToDate] = "UpToDate", + [D_DISKLESS] = "Diskless", + [D_ATTACHING] = "Attaching", + [D_FAILED] = "Failed", + [D_NEGOTIATING] = "Negotiating", + [D_INCONSISTENT] = "Inconsistent", + [D_OUTDATED] = "Outdated", + [D_UNKNOWN] = "DUnknown", + [D_CONSISTENT] = "Consistent", + [D_UP_TO_DATE] = "UpToDate", }; static const char *drbd_state_sw_errors[] = { - [-SS_TwoPrimaries] = "Multiple primaries not allowed by config", - [-SS_NoUpToDateDisk] = - "Refusing to be Primary without at least one UpToDate disk", - [-SS_BothInconsistent] = "Refusing to be inconsistent on both nodes", - [-SS_SyncingDiskless] = "Refusing to be syncing and diskless", - [-SS_ConnectedOutdates] = "Refusing to be Outdated while Connected", - [-SS_PrimaryNOP] = "Refusing to be Primary while peer is not outdated", - [-SS_ResyncRunning] = "Can not start OV/resync since it is already active", - [-SS_AlreadyStandAlone] = "Can not disconnect a StandAlone device", - [-SS_CW_FailedByPeer] = "State changed was refused by peer node", - [-SS_IsDiskLess] = - "Device is diskless, the requesed operation requires a disk", - [-SS_DeviceInUse] = "Device is held open by someone", - [-SS_NoNetConfig] = "Have no net/connection configuration", - [-SS_NoVerifyAlg] = "Need a verify algorithm to start online verify", - [-SS_NeedConnection] = "Need a connection to start verify or resync", - [-SS_NotSupported] = "Peer does not support protocol", - [-SS_LowerThanOutdated] = "Disk state is lower than outdated", - [-SS_InTransientState] = "In transient state, retry after next state change", - [-SS_ConcurrentStChg] = "Concurrent state changes detected and aborted", + [-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config", + [-SS_NO_UP_TO_DATE_DISK] = "Refusing to be Primary without at least one UpToDate disk", + [-SS_BOTH_INCONSISTENT] = "Refusing to be inconsistent on both nodes", + [-SS_SYNCING_DISKLESS] = "Refusing to be syncing and diskless", + [-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected", + [-SS_PRIMARY_NOP] = "Refusing to be Primary while peer is not outdated", + [-SS_RESYNC_RUNNING] = "Can not start OV/resync since it is already active", + [-SS_ALREADY_STANDALONE] = "Can not disconnect a StandAlone device", + [-SS_CW_FAILED_BY_PEER] = "State changed was refused by peer node", + [-SS_IS_DISKLESS] = "Device is diskless, the requesed operation requires a disk", + [-SS_DEVICE_IN_USE] = "Device is held open by someone", + [-SS_NO_NET_CONFIG] = "Have no net/connection configuration", + [-SS_NO_VERIFY_ALG] = "Need a verify algorithm to start online verify", + [-SS_NEED_CONNECTION] = "Need a connection to start verify or resync", + [-SS_NOT_SUPPORTED] = "Peer does not support protocol", + [-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated", + [-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change", + [-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted", }; const char *conns_to_name(enum drbd_conns s) { /* enums are unsigned... */ - return s > PausedSyncT ? "TOO_LARGE" : drbd_conn_s_names[s]; + return s > C_PAUSED_SYNC_T ? "TOO_LARGE" : drbd_conn_s_names[s]; } const char *roles_to_name(enum drbd_role s) { - return s > Secondary ? "TOO_LARGE" : drbd_role_s_names[s]; + return s > R_SECONDARY ? "TOO_LARGE" : drbd_role_s_names[s]; } const char *disks_to_name(enum drbd_disk_state s) { - return s > UpToDate ? "TOO_LARGE" : drbd_disk_s_names[s]; + return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s]; } -const char *set_st_err_name(enum set_st_err err) +const char *set_st_err_name(enum drbd_state_ret_codes err) { - return err <= SS_AfterLastError ? "TOO_SMALL" : - err > SS_TwoPrimaries ? "TOO_LARGE" + return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" : + err > SS_TWO_PRIMARIES ? "TOO_LARGE" : drbd_state_sw_errors[-err]; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 685dc71b8a8b..a39ba573743e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -99,7 +99,7 @@ void drbd_md_io_complete(struct bio *bio, int error) void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) { unsigned long flags = 0; - struct Tl_epoch_entry *e = NULL; + struct drbd_epoch_entry *e = NULL; struct drbd_conf *mdev; int uptodate = bio_flagged(bio, BIO_UPTODATE); @@ -129,7 +129,7 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) drbd_queue_work(&mdev->data.work, &e->w); dec_local(mdev); - MTRACE(TraceTypeEE, TraceLvlAll, + MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, dev_info(DEV, "Moved EE (READ) to worker sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); @@ -141,7 +141,7 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) { unsigned long flags = 0; - struct Tl_epoch_entry *e = NULL; + struct drbd_epoch_entry *e = NULL; struct drbd_conf *mdev; sector_t e_sector; int do_wake; @@ -191,7 +191,7 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); - MTRACE(TraceTypeEE, TraceLvlAll, + MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, dev_info(DEV, "Moved EE (WRITE) to done_ee sec=%llus size=%u ee=%p\n", (unsigned long long)e->sector, e->size, e); ); @@ -223,7 +223,7 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) } -/* read, readA or write requests on Primary comming from drbd_make_request +/* read, readA or write requests on R_PRIMARY comming from drbd_make_request */ void drbd_endio_pri(struct bio *bio, int error) { @@ -260,7 +260,7 @@ int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int ok; /* NOTE: mdev->bc can be NULL by the time we get here! */ - /* D_ASSERT(mdev->bc->dc.on_io_error != PassOn); */ + /* D_ASSERT(mdev->bc->dc.on_io_error != EP_PASS_ON); */ /* the only way this callback is scheduled is from _req_may_be_done, * when it is done and had a local write error, see comments there */ @@ -277,14 +277,14 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) struct drbd_request *req = (struct drbd_request *)w; /* We should not detach for read io-error, - * but try to WRITE the DataReply to the failed location, + * but try to WRITE the P_DATA_REPLY to the failed location, * to give the disk the chance to relocate that block */ drbd_io_error(mdev, FALSE); /* tries to schedule a detach and notifies peer */ spin_lock_irq(&mdev->req_lock); if (cancel || - mdev->state.conn < Connected || - mdev->state.pdsk <= Inconsistent) { + mdev->state.conn < C_CONNECTED || + mdev->state.pdsk <= D_INCONSISTENT) { _req_mod(req, send_canceled, 0); spin_unlock_irq(&mdev->req_lock); dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); @@ -324,7 +324,7 @@ STATIC void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bi STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; int digest_size; void *digest; int ok; @@ -348,7 +348,7 @@ STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel e->size, digest, digest_size, - CsumRSRequest); + P_CSUM_RS_REQUEST); kfree(digest); } else { dev_err(DEV, "kmalloc() of digest failed.\n"); @@ -370,7 +370,7 @@ STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel STATIC int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) { - struct Tl_epoch_entry *e; + struct drbd_epoch_entry *e; if (!inc_local(mdev)) return 0; @@ -408,7 +408,7 @@ void resync_timer_fn(unsigned long data) if (likely(!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))) { queue = 1; - if (mdev->state.conn == VerifyS) + if (mdev->state.conn == C_VERIFY_S) mdev->resync_work.cb = w_make_ov_request; else mdev->resync_work.cb = w_make_resync_request; @@ -437,18 +437,18 @@ int w_make_resync_request(struct drbd_conf *mdev, if (unlikely(cancel)) return 1; - if (unlikely(mdev->state.conn < Connected)) { + if (unlikely(mdev->state.conn < C_CONNECTED)) { dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected"); return 0; } - if (mdev->state.conn != SyncTarget) + if (mdev->state.conn != C_SYNC_TARGET) dev_err(DEV, "%s in w_make_resync_request\n", conns_to_name(mdev->state.conn)); if (!inc_local(mdev)) { /* Since we only need to access mdev->rsync a - inc_local_if_state(mdev,Failed) would be sufficient, but + inc_local_if_state(mdev,D_FAILED) would be sufficient, but to continue resync with a broken disk makes no sense at all */ dev_err(DEV, "Disk broke down during resync!\n"); @@ -548,7 +548,7 @@ next_sector: } } else { inc_rs_pending(mdev); - if (!drbd_send_drequest(mdev, RSDataRequest, + if (!drbd_send_drequest(mdev, P_RS_DATA_REQUEST, sector, size, ID_SYNCER)) { dev_err(DEV, "drbd_send_drequest() failed, aborting...\n"); dec_rs_pending(mdev); @@ -560,7 +560,7 @@ next_sector: if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) { /* last syncer _request_ was sent, - * but the RSDataReply not yet received. sync will end (and + * but the P_RS_DATA_REPLY not yet received. sync will end (and * next sync group will resume), as soon as we receive the last * resync data block, and the last bit is cleared. * until then resync "work" is "inactive" ... @@ -585,7 +585,7 @@ int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (unlikely(cancel)) return 1; - if (unlikely(mdev->state.conn < Connected)) { + if (unlikely(mdev->state.conn < C_CONNECTED)) { dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected"); return 0; } @@ -650,7 +650,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) { unsigned long db, dt, dbdt; unsigned long n_oos; - union drbd_state_t os, ns; + union drbd_state os, ns; struct drbd_work *w; char *khelper_cmd = NULL; @@ -659,7 +659,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) * resync LRU would be wrong. */ if (drbd_rs_del_all(mdev)) { /* In case this is not possible now, most probabely because - * there are RSDataReply Packets lingering on the worker's + * there are P_RS_DATA_REPLY Packets lingering on the worker's * queue (or even the read operations for those packets * is not finished by now). Retry in 100ms. */ @@ -690,20 +690,20 @@ int drbd_resync_finished(struct drbd_conf *mdev) /* This protects us against multiple calls (that can happen in the presence of application IO), and against connectivity loss just before we arrive here. */ - if (os.conn <= Connected) + if (os.conn <= C_CONNECTED) goto out_unlock; ns = os; - ns.conn = Connected; + ns.conn = C_CONNECTED; dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n", - (os.conn == VerifyS || os.conn == VerifyT) ? + (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ? "Online verify " : "Resync", dt + mdev->rs_paused, mdev->rs_paused, dbdt); n_oos = drbd_bm_total_weight(mdev); - if (os.conn == VerifyS || os.conn == VerifyT) { + if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) { if (n_oos) { dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n", n_oos, Bit2KB(1)); @@ -712,7 +712,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) } else { D_ASSERT((n_oos - mdev->rs_failed) == 0); - if (os.conn == SyncTarget || os.conn == PausedSyncT) + if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) khelper_cmd = "after-resync-target"; if (mdev->csums_tfm && mdev->rs_total) { @@ -733,24 +733,24 @@ int drbd_resync_finished(struct drbd_conf *mdev) if (mdev->rs_failed) { dev_info(DEV, " %lu failed blocks\n", mdev->rs_failed); - if (os.conn == SyncTarget || os.conn == PausedSyncT) { - ns.disk = Inconsistent; - ns.pdsk = UpToDate; + if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { + ns.disk = D_INCONSISTENT; + ns.pdsk = D_UP_TO_DATE; } else { - ns.disk = UpToDate; - ns.pdsk = Inconsistent; + ns.disk = D_UP_TO_DATE; + ns.pdsk = D_INCONSISTENT; } } else { - ns.disk = UpToDate; - ns.pdsk = UpToDate; + ns.disk = D_UP_TO_DATE; + ns.pdsk = D_UP_TO_DATE; - if (os.conn == SyncTarget || os.conn == PausedSyncT) { + if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) { if (mdev->p_uuid) { int i; - for (i = Bitmap ; i <= History_end ; i++) + for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) _drbd_uuid_set(mdev, i, mdev->p_uuid[i]); - drbd_uuid_set(mdev, Bitmap, mdev->bc->md.uuid[Current]); - _drbd_uuid_set(mdev, Current, mdev->p_uuid[Current]); + drbd_uuid_set(mdev, UI_BITMAP, mdev->bc->md.uuid[UI_CURRENT]); + _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]); } else { dev_err(DEV, "mdev->p_uuid is NULL! BUG\n"); } @@ -762,12 +762,12 @@ int drbd_resync_finished(struct drbd_conf *mdev) /* Now the two UUID sets are equal, update what we * know of the peer. */ int i; - for (i = Current ; i <= History_end ; i++) + for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) mdev->p_uuid[i] = mdev->bc->md.uuid[i]; } } - _drbd_set_state(mdev, ns, ChgStateVerbose, NULL); + _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); out_unlock: spin_unlock_irq(&mdev->req_lock); dec_local(mdev); @@ -790,11 +790,11 @@ out: } /** - * w_e_end_data_req: Send the answer (DataReply) in response to a DataRequest. + * w_e_end_data_req: Send the answer (P_DATA_REPLY) in response to a DataRequest. */ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; int ok; if (unlikely(cancel)) { @@ -804,13 +804,13 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } if (likely(drbd_bio_uptodate(e->private_bio))) { - ok = drbd_send_block(mdev, DataReply, e); + ok = drbd_send_block(mdev, P_DATA_REPLY, e); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. sector=%llus.\n", (unsigned long long)e->sector); - ok = drbd_send_ack(mdev, NegDReply, e); + ok = drbd_send_ack(mdev, P_NEG_DREPLY, e); drbd_io_error(mdev, FALSE); } @@ -832,11 +832,11 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } /** - * w_e_end_rsdata_req: Send the answer (RSDataReply) to a RSDataRequest. + * w_e_end_rsdata_req: Send the answer (P_RS_DATA_REPLY) to a RSDataRequest. */ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; int ok; if (unlikely(cancel)) { @@ -845,15 +845,15 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return 1; } - if (inc_local_if_state(mdev, Failed)) { + if (inc_local_if_state(mdev, D_FAILED)) { drbd_rs_complete_io(mdev, e->sector); dec_local(mdev); } if (likely(drbd_bio_uptodate(e->private_bio))) { - if (likely(mdev->state.pdsk >= Inconsistent)) { + if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { inc_rs_pending(mdev); - ok = drbd_send_block(mdev, RSDataReply, e); + ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Not sending RSDataReply, " @@ -865,7 +865,7 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", (unsigned long long)e->sector); - ok = drbd_send_ack(mdev, NegRSDReply, e); + ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); drbd_io_error(mdev, FALSE); @@ -891,7 +891,7 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; struct digest_info *di; int digest_size; void *digest = NULL; @@ -925,14 +925,14 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (eq) { drbd_set_in_sync(mdev, e->sector, e->size); mdev->rs_same_csum++; - ok = drbd_send_ack(mdev, RSIsInSync, e); + ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); } else { inc_rs_pending(mdev); e->block_id = ID_SYNCER; - ok = drbd_send_block(mdev, RSDataReply, e); + ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); } } else { - ok = drbd_send_ack(mdev, NegRSDReply, e); + ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); drbd_io_error(mdev, FALSE); @@ -958,7 +958,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; int digest_size; void *digest; int ok = 1; @@ -974,7 +974,7 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (digest) { drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest); ok = drbd_send_drequest_csum(mdev, e->sector, e->size, - digest, digest_size, OVReply); + digest, digest_size, P_OV_REPLY); if (ok) inc_rs_pending(mdev); kfree(digest); @@ -1004,7 +1004,7 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct Tl_epoch_entry *e = (struct Tl_epoch_entry *)w; + struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; struct digest_info *di; int digest_size; void *digest; @@ -1033,7 +1033,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) kfree(digest); } } else { - ok = drbd_send_ack(mdev, NegRSDReply, e); + ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); drbd_io_error(mdev, FALSE); @@ -1048,7 +1048,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) else ov_oos_print(mdev); - ok = drbd_send_ack_ex(mdev, OVResult, e->sector, e->size, + ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size, eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); spin_lock_irq(&mdev->req_lock); @@ -1072,8 +1072,8 @@ int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_barrier *b = (struct drbd_barrier *)w; - struct Drbd_Barrier_Packet *p = &mdev->data.sbuf.Barrier; + struct drbd_tl_epoch *b = (struct drbd_tl_epoch *)w; + struct p_barrier *p = &mdev->data.sbuf.barrier; int ok = 1; /* really avoid racing with tl_clear. w.cb may have been referenced @@ -1082,7 +1082,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * barrier packet here, and otherwise do nothing with the object. * but compare with the head of w_clear_epoch */ spin_lock_irq(&mdev->req_lock); - if (w->cb != w_send_barrier || mdev->state.conn < Connected) + if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED) cancel = 1; spin_unlock_irq(&mdev->req_lock); if (cancel) @@ -1094,8 +1094,8 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* inc_ap_pending was done where this was queued. * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in w_clear_epoch. */ - ok = _drbd_send_cmd(mdev, mdev->data.socket, Barrier, - (struct Drbd_Header *)p, sizeof(*p), 0); + ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER, + (struct p_header *)p, sizeof(*p), 0); drbd_put_data_sock(mdev); return ok; @@ -1105,7 +1105,7 @@ int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { if (cancel) return 1; - return drbd_send_short_cmd(mdev, UnplugRemote); + return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); } /** @@ -1140,14 +1140,14 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return 1; } - ok = drbd_send_drequest(mdev, DataRequest, req->sector, req->size, + ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->sector, req->size, (unsigned long)req); if (!ok) { - /* ?? we set Timeout or BrokenPipe in drbd_send(); + /* ?? we set C_TIMEOUT or C_BROKEN_PIPE in drbd_send(); * so this is probably redundant */ - if (mdev->state.conn >= Connected) - drbd_force_state(mdev, NS(conn, NetworkFailure)); + if (mdev->state.conn >= C_CONNECTED) + drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); } req_mod(req, ok ? handed_over_to_network : send_failed, 0); @@ -1163,8 +1163,8 @@ STATIC int _drbd_may_sync_now(struct drbd_conf *mdev) return 1; odev = minor_to_mdev(odev->sync_conf.after); ERR_IF(!odev) return 1; - if ((odev->state.conn >= SyncSource && - odev->state.conn <= PausedSyncT) || + if ((odev->state.conn >= C_SYNC_SOURCE && + odev->state.conn <= C_PAUSED_SYNC_T) || odev->state.aftr_isp || odev->state.peer_isp || odev->state.user_isp) return 0; @@ -1186,11 +1186,11 @@ STATIC int _drbd_pause_after(struct drbd_conf *mdev) odev = minor_to_mdev(i); if (!odev) continue; - if (odev->state.conn == StandAlone && odev->state.disk == Diskless) + if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) continue; if (!_drbd_may_sync_now(odev)) - rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), ChgStateHard, NULL) - != SS_NothingToDo); + rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL) + != SS_NOTHING_TO_DO); } return rv; @@ -1211,13 +1211,13 @@ STATIC int _drbd_resume_next(struct drbd_conf *mdev) odev = minor_to_mdev(i); if (!odev) continue; - if (odev->state.conn == StandAlone && odev->state.disk == Diskless) + if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) continue; if (odev->state.aftr_isp) { if (_drbd_may_sync_now(odev)) rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0), - ChgStateHard, NULL) - != SS_NothingToDo) ; + CS_HARD, NULL) + != SS_NOTHING_TO_DO) ; } } return rv; @@ -1254,7 +1254,7 @@ void drbd_alter_sa(struct drbd_conf *mdev, int na) /** * drbd_start_resync: - * @side: Either SyncSource or SyncTarget + * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET * Start the resync process. Called from process context only, * either admin command or drbd_receiver. * Note, this function might bring you directly into one of the @@ -1262,50 +1262,50 @@ void drbd_alter_sa(struct drbd_conf *mdev, int na) */ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) { - union drbd_state_t ns; + union drbd_state ns; int r; - MTRACE(TraceTypeResync, TraceLvlSummary, + MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_SUMMARY, dev_info(DEV, "Resync starting: side=%s\n", - side == SyncTarget ? "SyncTarget" : "SyncSource"); - ); + side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource"); + ); drbd_bm_recount_bits(mdev); /* In case a previous resync run was aborted by an IO error... */ drbd_rs_cancel_all(mdev); - if (side == SyncTarget) { - /* Since application IO was locked out during WFBitMapT and - WFSyncUUID we are still unmodified. Before going to SyncTarget + if (side == C_SYNC_TARGET) { + /* Since application IO was locked out during C_WF_BITMAP_T and + C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET we check that we might make the data inconsistent. */ r = drbd_khelper(mdev, "before-resync-target"); r = (r >> 8) & 0xff; if (r > 0) { dev_info(DEV, "before-resync-target handler returned %d, " "dropping connection.\n", r); - drbd_force_state(mdev, NS(conn, Disconnecting)); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return; } } drbd_state_lock(mdev); - if (!inc_local_if_state(mdev, Negotiating)) { + if (!inc_local_if_state(mdev, D_NEGOTIATING)) { drbd_state_unlock(mdev); return; } - if (side == SyncTarget) { + if (side == C_SYNC_TARGET) { mdev->bm_resync_fo = 0; - } else /* side == SyncSource */ { + } else /* side == C_SYNC_SOURCE */ { u64 uuid; get_random_bytes(&uuid, sizeof(u64)); - drbd_uuid_set(mdev, Bitmap, uuid); + drbd_uuid_set(mdev, UI_BITMAP, uuid); drbd_send_sync_uuid(mdev, uuid); - D_ASSERT(mdev->state.disk == UpToDate); + D_ASSERT(mdev->state.disk == D_UP_TO_DATE); } write_lock_irq(&global_state_lock); @@ -1315,18 +1315,18 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) ns.conn = side; - if (side == SyncTarget) - ns.disk = Inconsistent; - else /* side == SyncSource */ - ns.pdsk = Inconsistent; + if (side == C_SYNC_TARGET) + ns.disk = D_INCONSISTENT; + else /* side == C_SYNC_SOURCE */ + ns.pdsk = D_INCONSISTENT; - r = __drbd_set_state(mdev, ns, ChgStateVerbose, NULL); + r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL); ns = mdev->state; - if (ns.conn < Connected) - r = SS_UnknownError; + if (ns.conn < C_CONNECTED) + r = SS_UNKNOWN_ERROR; - if (r == SS_Success) { + if (r == SS_SUCCESS) { mdev->rs_total = mdev->rs_mark_left = drbd_bm_total_weight(mdev); mdev->rs_failed = 0; @@ -1340,7 +1340,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) drbd_state_unlock(mdev); dec_local(mdev); - if (r == SS_Success) { + if (r == SS_SUCCESS) { dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", conns_to_name(ns.conn), (unsigned long) mdev->rs_total << (BM_BLOCK_SIZE_B-10), @@ -1351,7 +1351,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) return; } - if (ns.conn == SyncTarget) { + if (ns.conn == C_SYNC_TARGET) { D_ASSERT(!test_bit(STOP_SYNC_TIMER, &mdev->flags)); mod_timer(&mdev->resync_timer, jiffies); } @@ -1360,7 +1360,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) } } -int drbd_worker(struct Drbd_thread *thi) +int drbd_worker(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; struct drbd_work *w = NULL; @@ -1421,11 +1421,11 @@ int drbd_worker(struct Drbd_thread *thi) list_del_init(&w->list); spin_unlock_irq(&mdev->data.work.q_lock); - if (!w->cb(mdev, w, mdev->state.conn < Connected)) { + if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) { /* dev_warn(DEV, "worker: a callback failed! \n"); */ - if (mdev->state.conn >= Connected) + if (mdev->state.conn >= C_CONNECTED) drbd_force_state(mdev, - NS(conn, NetworkFailure)); + NS(conn, C_NETWORK_FAILURE)); } } @@ -1452,7 +1452,7 @@ int drbd_worker(struct Drbd_thread *thi) */ spin_unlock_irq(&mdev->data.work.q_lock); - D_ASSERT(mdev->state.disk == Diskless && mdev->state.conn == StandAlone); + D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. * wait here for the Exiting receiver. */ drbd_thread_stop(&mdev->receiver); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 748d1cb20d42..250002101e4e 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,95 +53,88 @@ #endif -enum io_error_handler { - PassOn, /* FIXME should the better be named "Ignore"? */ - CallIOEHelper, - Detach +enum drbd_io_error_p { + EP_PASS_ON, /* FIXME should the better be named "Ignore"? */ + EP_CALL_HELPER, + EP_DETACH }; -enum fencing_policy { - DontCare, - Resource, - Stonith +enum drbd_fencing_p { + FP_DONT_CARE, + FP_RESOURCE, + FP_STONITH }; -enum disconnect_handler { - Reconnect, - DropNetConf, - FreezeIO +enum drbd_disconnect_p { + DP_RECONNECT, + DP_DROP_NET_CONF, + DP_FREEZE_IO }; -enum after_sb_handler { - Disconnect, - DiscardYoungerPri, - DiscardOlderPri, - DiscardZeroChg, - DiscardLeastChg, - DiscardLocal, - DiscardRemote, - Consensus, - DiscardSecondary, - CallHelper, - Violently +enum drbd_after_sb_p { + ASB_DISCONNECT, + ASB_DISCARD_YOUNGER_PRI, + ASB_DISCARD_OLDER_PRI, + ASB_DISCARD_ZERO_CHG, + ASB_DISCARD_LEAST_CHG, + ASB_DISCARD_LOCAL, + ASB_DISCARD_REMOTE, + ASB_CONSENSUS, + ASB_DISCARD_SECONDARY, + ASB_CALL_HELPER, + ASB_VIOLENTLY }; /* KEEP the order, do not delete or insert. Only append. */ -enum ret_codes { - RetCodeBase = 100, - NoError, /* 101 ... */ - LAAlreadyInUse, - OAAlreadyInUse, - LDNameInvalid, - MDNameInvalid, - LDAlreadyInUse, - LDNoBlockDev, - MDNoBlockDev, - LDOpenFailed, - MDOpenFailed, - LDDeviceTooSmall, - MDDeviceTooSmall, - LDNoConfig, - LDMounted, - MDMounted, - LDMDInvalid, - LDDeviceTooLarge, - MDIOError, - MDInvalid, - CRAMAlgNotAvail, - CRAMAlgNotDigest, - KMallocFailed, - DiscardNotAllowed, - HaveDiskConfig, - HaveNetConfig, - UnknownMandatoryTag, - MinorNotKnown, - StateNotAllowed, - GotSignal, /* EINTR */ - NoResizeDuringResync, - APrimaryNodeNeeded, - SyncAfterInvalid, - SyncAfterCycle, - PauseFlagAlreadySet, - PauseFlagAlreadyClear, - DiskLowerThanOutdated, /* obsolete, now SS_LowerThanOutdated */ - UnknownNetLinkPacket, - HaveNoDiskConfig, - ProtocolCRequired, - VMallocFailed, - IntegrityAlgNotAvail, - IntegrityAlgNotDigest, - CPUMaskParseFailed, - CSUMSAlgNotAvail, - CSUMSAlgNotDigest, - VERIFYAlgNotAvail, - VERIFYAlgNotDigest, - CSUMSResyncRunning, - VERIFYIsRunning, - DataOfWrongCurrent, - MayNotBeConnected, +enum drbd_ret_codes { + ERR_CODE_BASE = 100, + NO_ERROR = 101, + ERR_LOCAL_ADDR = 102, + ERR_PEER_ADDR = 103, + ERR_OPEN_DISK = 104, + ERR_OPEN_MD_DISK = 105, + ERR_DISK_NOT_BDEV = 107, + ERR_MD_NOT_BDEV = 108, + ERR_DISK_TO_SMALL = 111, + ERR_MD_DISK_TO_SMALL = 112, + ERR_BDCLAIM_DISK = 114, + ERR_BDCLAIM_MD_DISK = 115, + ERR_MD_IDX_INVALID = 116, + ERR_IO_MD_DISK = 118, + ERR_MD_INVALID = 119, + ERR_AUTH_ALG = 120, + ERR_AUTH_ALG_ND = 121, + ERR_NOMEM = 122, + ERR_DISCARD = 123, + ERR_DISK_CONFIGURED = 124, + ERR_NET_CONFIGURED = 125, + ERR_MANDATORY_TAG = 126, + ERR_MINOR_INVALID = 127, + ERR_INTR = 129, /* EINTR */ + ERR_RESIZE_RESYNC = 130, + ERR_NO_PRIMARY = 131, + ERR_SYNC_AFTER = 132, + ERR_SYNC_AFTER_CYCLE = 133, + ERR_PAUSE_IS_SET = 134, + ERR_PAUSE_IS_CLEAR = 135, + ERR_PACKET_NR = 137, + ERR_NO_DISK = 138, + ERR_NOT_PROTO_C = 139, + ERR_NOMEM_BITMAP = 140, + ERR_INTEGRITY_ALG = 141, /* DRBD 8.2 only */ + ERR_INTEGRITY_ALG_ND = 142, /* DRBD 8.2 only */ + ERR_CPU_MASK_PARSE = 143, /* DRBD 8.2 only */ + ERR_CSUMS_ALG = 144, /* DRBD 8.2 only */ + ERR_CSUMS_ALG_ND = 145, /* DRBD 8.2 only */ + ERR_VERIFY_ALG = 146, /* DRBD 8.2 only */ + ERR_VERIFY_ALG_ND = 147, /* DRBD 8.2 only */ + ERR_CSUMS_RESYNC_RUNNING= 148, /* DRBD 8.2 only */ + ERR_VERIFY_RUNNING = 149, /* DRBD 8.2 only */ + ERR_DATA_NOT_CURRENT = 150, + ERR_CONNECTED = 151, /* DRBD 8.3 only */ /* insert new ones above this line */ - AfterLastRetCode, + AFTER_LAST_ERR_CODE }; #define DRBD_PROT_A 1 @@ -149,67 +142,67 @@ enum ret_codes { #define DRBD_PROT_C 3 enum drbd_role { - Unknown = 0, - Primary = 1, /* role */ - Secondary = 2, /* role */ - role_mask = 3, + R_UNKNOWN = 0, + R_PRIMARY = 1, /* role */ + R_SECONDARY = 2, /* role */ + R_MASK = 3, }; /* The order of these constants is important. - * The lower ones (=WFReportParams ==> There is a socket + * >=C_WF_REPORT_PARAMS ==> There is a socket */ enum drbd_conns { - StandAlone, - Disconnecting, /* Temporal state on the way to StandAlone. */ - Unconnected, /* >= Unconnected -> inc_net() succeeds */ + C_STANDALONE, + C_DISCONNECTING, /* Temporal state on the way to StandAlone. */ + C_UNCONNECTED, /* >= C_UNCONNECTED -> inc_net() succeeds */ /* These temporal states are all used on the way - * from >= Connected to Unconnected. + * from >= C_CONNECTED to Unconnected. * The 'disconnect reason' states * I do not allow to change beween them. */ - Timeout, - BrokenPipe, - NetworkFailure, - ProtocolError, - TearDown, - - WFConnection, - WFReportParams, /* we have a socket */ - Connected, /* we have introduced each other */ - StartingSyncS, /* starting full sync by IOCTL. */ - StartingSyncT, /* stariing full sync by IOCTL. */ - WFBitMapS, - WFBitMapT, - WFSyncUUID, + C_TIMEOUT, + C_BROKEN_PIPE, + C_NETWORK_FAILURE, + C_PROTOCOL_ERROR, + C_TEAR_DOWN, + + C_WF_CONNECTION, + C_WF_REPORT_PARAMS, /* we have a socket */ + C_CONNECTED, /* we have introduced each other */ + C_STARTING_SYNC_S, /* starting full sync by IOCTL. */ + C_STARTING_SYNC_T, /* stariing full sync by IOCTL. */ + C_WF_BITMAP_S, + C_WF_BITMAP_T, + C_WF_SYNC_UUID, /* All SyncStates are tested with this comparison - * xx >= SyncSource && xx <= PausedSyncT */ - SyncSource, - SyncTarget, - VerifyS, - VerifyT, - PausedSyncS, - PausedSyncT, - conn_mask = 31 + * xx >= C_SYNC_SOURCE && xx <= C_PAUSED_SYNC_T */ + C_SYNC_SOURCE, + C_SYNC_TARGET, + C_VERIFY_S, + C_VERIFY_T, + C_PAUSED_SYNC_S, + C_PAUSED_SYNC_T, + C_MASK = 31 }; enum drbd_disk_state { - Diskless, - Attaching, /* In the process of reading the meta-data */ - Failed, /* Becomes Diskless as soon as we told it the peer */ - /* when >= Failed it is legal to access mdev->bc */ - Negotiating, /* Late attaching state, we need to talk to the peer */ - Inconsistent, - Outdated, - DUnknown, /* Only used for the peer, never for myself */ - Consistent, /* Might be Outdated, might be UpToDate ... */ - UpToDate, /* Only this disk state allows applications' IO ! */ - disk_mask = 15 + D_DISKLESS, + D_ATTACHING, /* In the process of reading the meta-data */ + D_FAILED, /* Becomes D_DISKLESS as soon as we told it the peer */ + /* when >= D_FAILED it is legal to access mdev->bc */ + D_NEGOTIATING, /* Late attaching state, we need to talk to the peer */ + D_INCONSISTENT, + D_OUTDATED, + D_UNKNOWN, /* Only used for the peer, never for myself */ + D_CONSISTENT, /* Might be D_OUTDATED, might be D_UP_TO_DATE ... */ + D_UP_TO_DATE, /* Only this disk state allows applications' IO ! */ + D_MASK = 15 }; -union drbd_state_t { +union drbd_state { /* According to gcc's docs is the ... * The order of allocation of bit-fields within a unit (C90 6.5.2.1, C99 6.7.2.1). * Determined by ABI. @@ -223,8 +216,8 @@ union drbd_state_t { unsigned role:2 ; /* 3/4 primary/secondary/unknown */ unsigned peer:2 ; /* 3/4 primary/secondary/unknown */ unsigned conn:5 ; /* 17/32 cstates */ - unsigned disk:4 ; /* 8/16 from Diskless to UpToDate */ - unsigned pdsk:4 ; /* 8/16 from Diskless to UpToDate */ + unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ + unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ unsigned susp:1 ; /* 2/2 IO suspended no/yes */ unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ unsigned peer_isp:1 ; @@ -236,8 +229,8 @@ union drbd_state_t { unsigned peer_isp:1 ; unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ unsigned susp:1 ; /* 2/2 IO suspended no/yes */ - unsigned pdsk:4 ; /* 8/16 from Diskless to UpToDate */ - unsigned disk:4 ; /* 8/16 from Diskless to UpToDate */ + unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ + unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ unsigned conn:5 ; /* 17/32 cstates */ unsigned peer:2 ; /* 3/4 primary/secondary/unknown */ unsigned role:2 ; /* 3/4 primary/secondary/unknown */ @@ -248,72 +241,63 @@ union drbd_state_t { unsigned int i; }; -enum set_st_err { - SS_CW_NoNeed = 4, - SS_CW_Success = 3, - SS_NothingToDo = 2, - SS_Success = 1, - SS_UnknownError = 0, /* Used to sleep longer in _drbd_request_state */ - SS_TwoPrimaries = -1, - SS_NoUpToDateDisk = -2, - SS_BothInconsistent = -4, - SS_SyncingDiskless = -5, - SS_ConnectedOutdates = -6, - SS_PrimaryNOP = -7, - SS_ResyncRunning = -8, - SS_AlreadyStandAlone = -9, - SS_CW_FailedByPeer = -10, - SS_IsDiskLess = -11, - SS_DeviceInUse = -12, - SS_NoNetConfig = -13, - SS_NoVerifyAlg = -14, /* drbd-8.2 only */ - SS_NeedConnection = -15, /* drbd-8.2 only */ - SS_LowerThanOutdated = -16, - SS_NotSupported = -17, /* drbd-8.2 only */ - SS_InTransientState = -18, /* Retry after the next state change */ - SS_ConcurrentStChg = -19, /* Concurrent cluster side state change! */ - SS_AfterLastError = -20, /* Keep this at bottom */ +enum drbd_state_ret_codes { + SS_CW_NO_NEED = 4, + SS_CW_SUCCESS = 3, + SS_NOTHING_TO_DO = 2, + SS_SUCCESS = 1, + SS_UNKNOWN_ERROR = 0, /* Used to sleep longer in _drbd_request_state */ + SS_TWO_PRIMARIES = -1, + SS_NO_UP_TO_DATE_DISK = -2, + SS_BOTH_INCONSISTENT = -4, + SS_SYNCING_DISKLESS = -5, + SS_CONNECTED_OUTDATES = -6, + SS_PRIMARY_NOP = -7, + SS_RESYNC_RUNNING = -8, + SS_ALREADY_STANDALONE = -9, + SS_CW_FAILED_BY_PEER = -10, + SS_IS_DISKLESS = -11, + SS_DEVICE_IN_USE = -12, + SS_NO_NET_CONFIG = -13, + SS_NO_VERIFY_ALG = -14, /* drbd-8.2 only */ + SS_NEED_CONNECTION = -15, /* drbd-8.2 only */ + SS_LOWER_THAN_OUTDATED = -16, + SS_NOT_SUPPORTED = -17, /* drbd-8.2 only */ + SS_IN_TRANSIENT_STATE = -18, /* Retry after the next state change */ + SS_CONCURRENT_ST_CHG = -19, /* Concurrent cluster side state change! */ + SS_AFTER_LAST_ERROR = -20, /* Keep this at bottom */ }; /* from drbd_strings.c */ extern const char *conns_to_name(enum drbd_conns); extern const char *roles_to_name(enum drbd_role); extern const char *disks_to_name(enum drbd_disk_state); -extern const char *set_st_err_name(enum set_st_err); +extern const char *set_st_err_name(enum drbd_state_ret_codes); #define SHARED_SECRET_MAX 64 -enum MetaDataFlags { - __MDF_Consistent, - __MDF_PrimaryInd, - __MDF_ConnectedInd, - __MDF_FullSync, - __MDF_WasUpToDate, - __MDF_PeerOutDated, /* or worse (e.g. invalid). */ - __MDF_CrashedPrimary, -}; -#define MDF_Consistent (1<<__MDF_Consistent) -#define MDF_PrimaryInd (1<<__MDF_PrimaryInd) -#define MDF_ConnectedInd (1<<__MDF_ConnectedInd) -#define MDF_FullSync (1<<__MDF_FullSync) -#define MDF_WasUpToDate (1<<__MDF_WasUpToDate) -#define MDF_PeerOutDated (1<<__MDF_PeerOutDated) -#define MDF_CrashedPrimary (1<<__MDF_CrashedPrimary) - -enum UuidIndex { - Current, - Bitmap, - History_start, - History_end, - UUID_SIZE, /* nl-packet: number of dirty bits */ - UUID_FLAGS, /* nl-packet: flags */ - EXT_UUID_SIZE /* Everything. */ +#define MDF_CONSISTENT (1 << 0) +#define MDF_PRIMARY_IND (1 << 1) +#define MDF_CONNECTED_IND (1 << 2) +#define MDF_FULL_SYNC (1 << 3) +#define MDF_WAS_UP_TO_DATE (1 << 4) +#define MDF_PEER_OUT_DATED (1 << 5) +#define MDF_CRASHED_PRIMARY (1 << 6) + +enum drbd_uuid_index { + UI_CURRENT, + UI_BITMAP, + UI_HISTORY_START, + UI_HISTORY_END, + UI_SIZE, /* nl-packet: number of dirty bits */ + UI_FLAGS, /* nl-packet: flags */ + UI_EXTENDED_SIZE /* Everything. */ }; -enum UseTimeout { - UT_Default = 0, - UT_Degraded = 1, - UT_PeerOutdated = 2, +enum drbd_timeout_flag { + UT_DEFAULT = 0, + UT_DEGRADED = 1, + UT_PEER_OUTDATED = 2, }; #define UUID_JUST_CREATED ((__u64)4) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 7390c354b0d2..2fafc2b9cdb1 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -47,7 +47,7 @@ #define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ - /* active connection retries when WFConnection */ + /* active connection retries when C_WF_CONNECTION */ #define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MAX 120 #define DRBD_CONNECT_INT_DEF 10 /* seconds */ @@ -118,12 +118,12 @@ #define DRBD_DISK_SIZE_SECT_MAX (16 * (2LLU << 30)) #define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ -#define DRBD_ON_IO_ERROR_DEF PassOn -#define DRBD_FENCING_DEF DontCare -#define DRBD_AFTER_SB_0P_DEF Disconnect -#define DRBD_AFTER_SB_1P_DEF Disconnect -#define DRBD_AFTER_SB_2P_DEF Disconnect -#define DRBD_RR_CONFLICT_DEF Disconnect +#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON +#define DRBD_FENCING_DEF FP_DONT_CARE +#define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT +#define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT +#define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT +#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT #define DRBD_MAX_BIO_BVECS_MIN 0 #define DRBD_MAX_BIO_BVECS_MAX 128 diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index 6315ce8af647..cc99f3ecd8ca 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -93,7 +93,7 @@ NL_PACKET(get_state, 17, ) NL_PACKET(get_uuids, 18, - NL_STRING( 34, T_MAY_IGNORE, uuids, (UUID_SIZE*sizeof(__u64))) + NL_STRING( 34, T_MAY_IGNORE, uuids, (UI_SIZE*sizeof(__u64))) NL_INTEGER( 35, T_MAY_IGNORE, uuids_flags) ) -- cgit v1.2.3 From c733785235d1cf1e37e70446182a21c06e69d519 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 29 Apr 2009 17:36:31 +0200 Subject: Tracking DRBD mainline 2fcb72c00e0fb35120665943cbe7003739bd1009 get rid of unneccessary fsync_bdev 2721065e39000629e87296676fd077bb0970417b Merge branch 'drbd-8.2' into drbd-8.3 93edff9e37a122b5f2d23ed2baef0dfeef1c5a17 Merge branch 'drbd-8.0' into drbd-8.2 0fb44e5ef678346808a9ab06a259112af7814fc8 dead code removal 13bb2bb8dd3fddbae88a1fcba757b567208e3ae1 fix config/deconfig race 4c0505a621e8c06eeac9a7d831621ea949770fe2 need to hold req_lock for_drbd_set_state (recently introduced bogon) 590cb50f41337c39b7d8095991fa1d144dbc3fd7 remove unnecessary variable, fix comment formatting 73abcf4b8f53f26fa4a6d524916fba142342df33 make bm_lock message moreinformative; hunting configuration race condition 0efb38921eff65f7f05aa2e028833aea5ed5b8f4 adjust loglevel: some dev_dbg should rather be warn, or info at least 50ddc402f72ab5b4486e0efa912a16b6f3d3012a bitmap compression stats: output percentage saved, not 'factor' 1737021364ea6da4d08bd08e99d1083587d271d4 compressed bitmap: finally settle for _one_ encoding c63bd973e2d52baeceb425592af371bf8f4fe2cd introduce disable_sendpage module parameter a87458a4243968229b9db85e680d464aae3c9bd4 fix double send_cnt accounting of _drbd_no_send_page fallbacks Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 4 +- drivers/block/drbd/drbd_buildtag.c | 4 +- drivers/block/drbd/drbd_int.h | 18 +- drivers/block/drbd/drbd_main.c | 171 +++++------------ drivers/block/drbd/drbd_nl.c | 156 ++++++++-------- drivers/block/drbd/drbd_receiver.c | 100 +++------- drivers/block/drbd/drbd_vli.h | 369 +++++++++++++------------------------ drivers/block/drbd/drbd_worker.c | 6 + include/linux/drbd_config.h | 1 - 9 files changed, 295 insertions(+), 534 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e602c778e712..0614f2c96f15 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -115,10 +115,10 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why) trylock_failed = down_trylock(&b->bm_change); if (trylock_failed) { - dev_dbg(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", + dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", current == mdev->receiver.task ? "receiver" : current == mdev->asender.task ? "asender" : - current == mdev->worker.task ? "worker" : "?", + current == mdev->worker.task ? "worker" : current->comm, why, b->bm_why ?: "?", b->bm_task == mdev->receiver.task ? "receiver" : b->bm_task == mdev->asender.task ? "asender" : diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c index 617078b3dc33..a58ad76078a5 100644 --- a/drivers/block/drbd/drbd_buildtag.c +++ b/drivers/block/drbd/drbd_buildtag.c @@ -2,6 +2,6 @@ #include const char *drbd_buildtag(void) { - return "GIT-hash: 1a59b007715215697968cfaed3f2f159d262c030 drbd/drbd_nl.c" - " build by phil@fat-tyre, 2009-04-22 11:36:29"; + return "GIT-hash: 29ef4c01e46b0a269d7bec39d5178be06097fead drbd/Kconfig drbd/Makefile drbd/Makefile-2.6 drbd/drbd_actlog.c drbd/drbd_bitmap.c drbd/drbd_int.h drbd/drbd_main.c drbd/drbd_nl.c drbd/drbd_proc.c drbd/drbd_receiver.c drbd/drbd_req.c drbd/drbd_req.h drbd/drbd_tracing.c drbd/drbd_tracing.h drbd/drbd_worker.c drbd/drbd_wrappers.h drbd/linux/drbd_config.h" + " build by phil@fat-tyre, 2009-04-29 15:43:41"; } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 94138cc08943..ba43fa57b750 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -58,6 +58,7 @@ /* module parameter, defined in drbd_main.c */ extern unsigned int minor_count; +extern int disable_sendpage; extern int allow_oos; extern unsigned int cn_idx; @@ -525,12 +526,10 @@ struct p_discard { /* Valid values for the encoding field. * Bump proto version when changing this. */ enum drbd_bitmap_code { - RLE_VLI_Bytes = 0, - RLE_VLI_BitsFibD_0_1 = 1, - RLE_VLI_BitsFibD_1_1 = 2, - RLE_VLI_BitsFibD_1_2 = 3, - RLE_VLI_BitsFibD_2_3 = 4, - RLE_VLI_BitsFibD_3_5 = 5, + /* RLE_VLI_Bytes = 0, + * and other bit variants had been defined during + * algorithm evaluation. */ + RLE_VLI_Bits = 2, }; struct p_compressed_bm { @@ -777,6 +776,13 @@ enum { BITMAP_IO_QUEUED, /* Started bitmap IO */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ NET_CONGESTED, /* The data socket is congested */ + + CONFIG_PENDING, /* serialization of (re)configuration requests. + * if set, also prevents the device from dying */ + DEVICE_DYING, /* device became unconfigured, + * but worker thread is still handling the cleanup. + * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, + * while this is set. */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4c84365aeeef..79cb0183f817 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -92,6 +92,7 @@ MODULE_PARM_DESC(allow_oos, "DONT USE!"); /* thanks to these macros, if compiled into the kernel (not-module), * this becomes the boot parameter drbd.minor_count */ module_param(minor_count, uint, 0444); +module_param(disable_sendpage, bool, 0644); module_param(allow_oos, bool, 0); module_param(cn_idx, uint, 0444); @@ -112,6 +113,7 @@ module_param(fault_devs, int, 0644); /* module parameter, defined */ unsigned int minor_count = 32; +int disable_sendpage; int allow_oos; unsigned int cn_idx = CN_IDX_DRBD; @@ -931,6 +933,20 @@ int __drbd_set_state(struct drbd_conf *mdev, dev_info(DEV, "%s\n", pb); } + /* solve the race between becoming unconfigured, + * worker doing the cleanup, and + * admin reconfiguring us: + * on (re)configure, first set CONFIG_PENDING, + * then wait for a potentially exiting worker, + * start the worker, and schedule one no_op. + * then proceed with configuration. + */ + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY && + !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) + set_bit(DEVICE_DYING, &mdev->flags); + mdev->state.i = ns.i; wake_up(&mdev->misc_wait); wake_up(&mdev->state_wait); @@ -1192,9 +1208,9 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, mdev->resync = NULL; lc_free(mdev->act_log); mdev->act_log = NULL; - __no_warn(local, drbd_free_bc(mdev->bc);); - wmb(); /* see begin of drbd_nl_disk_conf() */ - __no_warn(local, mdev->bc = NULL;); + __no_warn(local, + drbd_free_bc(mdev->bc); + mdev->bc = NULL;); if (mdev->md_io_tmpp) __free_page(mdev->md_io_tmpp); @@ -1219,10 +1235,14 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Terminate worker thread if we are unconfigured - it will be restarted as needed... */ - if (ns.disk == D_DISKLESS && ns.conn == C_STANDALONE && ns.role == R_SECONDARY) { + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY) { if (os.aftr_isp != ns.aftr_isp) resume_next_sg(mdev); - drbd_thread_stop_nowait(&mdev->worker); + /* set in __drbd_set_state, unless CONFIG_PENDING was set */ + if (test_bit(DEVICE_DYING, &mdev->flags)) + drbd_thread_stop_nowait(&mdev->worker); } drbd_md_sync(mdev); @@ -1327,6 +1347,7 @@ int drbd_thread_start(struct drbd_thread *thi) thi->t_state = Restarting; dev_info(DEV, "Restarting %s thread (from %s [%d])\n", me, current->comm, current->pid); + /* fall through */ case Running: case Restarting: default: @@ -1704,102 +1725,6 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) (struct p_header *)&p, sizeof(p)); } -/* returns - * positive: number of payload bytes needed in this packet. - * zero: incompressible. */ -int fill_bitmap_rle_bytes(struct drbd_conf *mdev, - struct p_compressed_bm *p, - struct bm_xfer_ctx *c) -{ - unsigned long plain_bits; - unsigned long tmp; - unsigned long rl; - void *buffer; - unsigned n; - unsigned len; - unsigned toggle; - - /* may we use this feature? */ - if ((mdev->sync_conf.use_rle_encoding == 0) || - (mdev->agreed_pro_version < 90)) - return 0; - - if (c->bit_offset >= c->bm_bits) - return 0; /* nothing to do. */ - - /* use at most thus many bytes */ - len = BM_PACKET_VLI_BYTES_MAX; - buffer = p->code; - /* plain bits covered in this code string */ - plain_bits = 0; - - /* p->encoding & 0x80 stores whether the first - * run length is set. - * bit offset is implicit. - * start with toggle == 2 to be able to tell the first iteration */ - toggle = 2; - - /* see how much plain bits we can stuff into one packet - * using RLE and VLI. */ - do { - tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset) - : _drbd_bm_find_next(mdev, c->bit_offset); - if (tmp == -1UL) - tmp = c->bm_bits; - rl = tmp - c->bit_offset; - - if (toggle == 2) { /* first iteration */ - if (rl == 0) { - /* the first checked bit was set, - * store start value, */ - DCBP_set_start(p, 1); - /* but skip encoding of zero run length */ - toggle = !toggle; - continue; - } - DCBP_set_start(p, 0); - } - - /* paranoia: catch zero runlength. - * can only happen if bitmap is modified while we scan it. */ - if (rl == 0) { - dev_err(DEV, "unexpected zero runlength while encoding bitmap " - "t:%u bo:%lu\n", toggle, c->bit_offset); - return -1; - } - - n = vli_encode_bytes(buffer, rl, len); - if (n == 0) /* buffer full */ - break; - - toggle = !toggle; - buffer += n; - len -= n; - plain_bits += rl; - c->bit_offset = tmp; - } while (len && c->bit_offset < c->bm_bits); - - len = BM_PACKET_VLI_BYTES_MAX - len; - - if (plain_bits < (len << 3)) { - /* incompressible with this method. - * we need to rewind both word and bit position. */ - c->bit_offset -= plain_bits; - bm_xfer_ctx_bit_to_word_offset(c); - c->bit_offset = c->word_offset * BITS_PER_LONG; - return 0; - } - - /* RLE + VLI was able to compress it just fine. - * update c->word_offset. */ - bm_xfer_ctx_bit_to_word_offset(c); - - /* store pad_bits */ - DCBP_set_pad_bits(p, 0); - - return len; -} - int fill_bitmap_rle_bits(struct drbd_conf *mdev, struct p_compressed_bm *p, struct bm_xfer_ctx *c) @@ -1826,8 +1751,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, /* plain bits covered in this code string */ plain_bits = 0; - /* p->encoding & 0x80 stores whether the first - * run length is set. + /* p->encoding & 0x80 stores whether the first run length is set. * bit offset is implicit. * start with toggle == 2 to be able to tell the first iteration */ toggle = 2; @@ -1904,15 +1828,13 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, int len; int ok; - if (0) - len = fill_bitmap_rle_bytes(mdev, p, c); - else - len = fill_bitmap_rle_bits(mdev, p, c); + len = fill_bitmap_rle_bits(mdev, p, c); if (len < 0) return FAILED; + if (len) { - DCBP_set_code(p, 0 ? RLE_VLI_Bytes : RLE_VLI_BitsFibD_3_5); + DCBP_set_code(p, RLE_VLI_Bits); ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h, sizeof(*p) + len, 0); @@ -2191,7 +2113,7 @@ STATIC int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, kunmap(page); if (sent == size) mdev->send_cnt += size>>9; - return sent; + return sent == size; } int _drbd_send_page(struct drbd_conf *mdev, struct page *page, @@ -2201,21 +2123,14 @@ int _drbd_send_page(struct drbd_conf *mdev, struct page *page, int sent, ok; int len = size; - /* PARANOIA. if this ever triggers, - * something in the layers above us is really kaputt. - *one roundtrip later: - * doh. it triggered. so XFS _IS_ really kaputt ... - * oh well... - */ - if ((page_count(page) < 1) || PageSlab(page)) { - /* e.g. XFS meta- & log-data is in slab pages, which have a - * page_count of 0 and/or have PageSlab() set... - */ - sent = _drbd_no_send_page(mdev, page, offset, size); - if (likely(sent > 0)) - len -= sent; - goto out; - } + /* e.g. XFS meta- & log-data is in slab pages, which have a + * page_count of 0 and/or have PageSlab() set. + * we cannot use send_page for those, as that does get_page(); + * put_page(); and would cause either a VM_BUG directly, or + * __page_cache_release a page that would actually still be referenced + * by someone, leading to some obscure delayed Oops somewhere else. */ + if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) + return _drbd_no_send_page(mdev, page, offset, size); drbd_update_congested(mdev); set_fs(KERNEL_DS); @@ -2241,7 +2156,6 @@ int _drbd_send_page(struct drbd_conf *mdev, struct page *page, set_fs(oldfs); clear_bit(NET_CONGESTED, &mdev->flags); -out: ok = (len == 0); if (likely(ok)) mdev->send_cnt += size>>9; @@ -2643,8 +2557,11 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) D_ASSERT(mdev->net_conf == NULL); drbd_set_my_capacity(mdev, 0); - drbd_bm_resize(mdev, 0); - drbd_bm_cleanup(mdev); + if (mdev->bitmap) { + /* maybe never allocated. */ + drbd_bm_resize(mdev, 0); + drbd_bm_cleanup(mdev); + } drbd_free_resources(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 3b46a934c2d6..c72c5adbbfd6 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -27,7 +27,6 @@ #include #include #include -#include /* for fsync_bdev */ #include #include #include @@ -353,8 +352,6 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (forced) dev_warn(DEV, "Forced to consider local data as UpToDate!\n"); - fsync_bdev(mdev->this_bdev); - /* Wait until nothing is on the fly :) */ wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); @@ -493,18 +490,15 @@ char *ppsize(char *buf, unsigned long long size) * waits for ap_bio_cnt == 0. -> deadlock. * but this cannot happen, actually, because: * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable - * (not connected, * or bad/no disk on peer): + * (not connected, or bad/no disk on peer): * see drbd_fail_request_early, ap_bio_cnt is zero. * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET: * peer may not initiate a resize. */ void drbd_suspend_io(struct drbd_conf *mdev) { - int in_flight; set_bit(SUSPEND_IO, &mdev->flags); - in_flight = atomic_read(&mdev->ap_bio_cnt); - if (in_flight) - wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); } void drbd_resume_io(struct drbd_conf *mdev) @@ -761,6 +755,36 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu } } +/* serialize deconfig (worker exiting, doing cleanup) + * and reconfig (drbdsetup disk, drbdsetup net) + * + * wait for a potentially exiting worker, then restart it, + * or start a new one. + */ +static void drbd_reconfig_start(struct drbd_conf *mdev) +{ + wait_event(mdev->state_wait, test_and_set_bit(CONFIG_PENDING, &mdev->flags)); + wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); + drbd_thread_start(&mdev->worker); +} + +/* if still unconfigured, stops worker again. + * if configured now, clears CONFIG_PENDING. + * wakes potential waiters */ +static void drbd_reconfig_done(struct drbd_conf *mdev) +{ + spin_lock_irq(&mdev->req_lock); + if (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY) { + set_bit(DEVICE_DYING, &mdev->flags); + drbd_thread_stop_nowait(&mdev->worker); + } else + clear_bit(CONFIG_PENDING, &mdev->flags); + spin_unlock_irq(&mdev->req_lock); + wake_up(&mdev->state_wait); +} + /* does always return 0; * interesting return code is in reply->ret_code */ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, @@ -774,33 +798,18 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp struct inode *inode, *inode2; struct lru_cache *resync_lru = NULL; union drbd_state ns, os; - int rv, ntries = 0; + int rv; int cp_discovered = 0; int hardsect; + drbd_reconfig_start(mdev); + /* if you want to reconfigure, please tear down first */ if (mdev->state.disk > D_DISKLESS) { retcode = ERR_DISK_CONFIGURED; goto fail; } - /* - * We may have gotten here very quickly from a detach. Wait for a bit - * then fail. - */ - while (1) { - __no_warn(local, nbc = mdev->bc;); - if (nbc == NULL) - break; - if (ntries++ >= 5) { - dev_warn(DEV, "drbd_nl_disk_conf: mdev->bc not NULL.\n"); - retcode = ERR_DISK_CONFIGURED; - goto fail; - } - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ/10); - } - nbc = kmalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { retcode = ERR_NOMEM; @@ -808,17 +817,11 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } memset(&nbc->md, 0, sizeof(struct drbd_md)); - - if (!(nlp->flags & DRBD_NL_SET_DEFAULTS) && inc_local(mdev)) { - memcpy(&nbc->dc, &mdev->bc->dc, sizeof(struct disk_conf)); - dec_local(mdev); - } else { - memset(&nbc->dc, 0, sizeof(struct disk_conf)); - nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; - nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; - nbc->dc.fencing = DRBD_FENCING_DEF; - nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; - } + memset(&nbc->dc, 0, sizeof(struct disk_conf)); + nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; + nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; + nbc->dc.fencing = DRBD_FENCING_DEF; + nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) { retcode = ERR_MANDATORY_TAG; @@ -882,13 +885,6 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto release_bdev_fail; } - if (!mdev->bitmap) { - if (drbd_bm_init(mdev)) { - retcode = ERR_NOMEM; - goto release_bdev_fail; - } - } - nbc->md_bdev = inode2->i_bdev; if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || @@ -949,7 +945,9 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->known_size = drbd_get_capacity(nbc->backing_bdev); drbd_suspend_io(mdev); + /* also wait for the last barrier ack. */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt)); + retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); drbd_resume_io(mdev); if (retcode < SS_SUCCESS) @@ -958,9 +956,15 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (!inc_local_if_state(mdev, D_ATTACHING)) goto force_diskless; - drbd_thread_start(&mdev->worker); drbd_md_set_sector_offsets(mdev, nbc); + if (!mdev->bitmap) { + if (drbd_bm_init(mdev)) { + retcode = ERR_NOMEM; + goto force_diskless_dec; + } + } + retcode = drbd_md_read(mdev, nbc); if (retcode != NO_ERROR) goto force_diskless_dec; @@ -1150,6 +1154,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); dec_local(mdev); reply->ret_code = retcode; + drbd_reconfig_done(mdev); return 0; force_diskless_dec: @@ -1175,18 +1180,14 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp lc_free(resync_lru); reply->ret_code = retcode; + drbd_reconfig_done(mdev); return 0; } STATIC int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - fsync_bdev(mdev->this_bdev); reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); - - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ/20); /* 50ms; Time for worker to finally terminate */ - return 0; } @@ -1208,6 +1209,8 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, void *int_dig_vv = NULL; struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; + drbd_reconfig_start(mdev); + if (mdev->state.conn > C_STANDALONE) { retcode = ERR_NET_CONFIGURED; goto fail; @@ -1219,28 +1222,23 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } - if (!(nlp->flags & DRBD_NL_SET_DEFAULTS) && inc_net(mdev)) { - memcpy(new_conf, mdev->net_conf, sizeof(struct net_conf)); - dec_net(mdev); - } else { - memset(new_conf, 0, sizeof(struct net_conf)); - new_conf->timeout = DRBD_TIMEOUT_DEF; - new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; - new_conf->ping_int = DRBD_PING_INT_DEF; - new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; - new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; - new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; - new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; - new_conf->ko_count = DRBD_KO_COUNT_DEF; - new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; - new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; - new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF; - new_conf->want_lose = 0; - new_conf->two_primaries = 0; - new_conf->wire_protocol = DRBD_PROT_C; - new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; - new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; - } + memset(new_conf, 0, sizeof(struct net_conf)); + new_conf->timeout = DRBD_TIMEOUT_DEF; + new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; + new_conf->ping_int = DRBD_PING_INT_DEF; + new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; + new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; + new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; + new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; + new_conf->ko_count = DRBD_KO_COUNT_DEF; + new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; + new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; + new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF; + new_conf->want_lose = 0; + new_conf->two_primaries = 0; + new_conf->wire_protocol = DRBD_PROT_C; + new_conf->ping_timeo = DRBD_PING_TIMEO_DEF; + new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF; if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { retcode = ERR_MANDATORY_TAG; @@ -1367,7 +1365,12 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } } - D_ASSERT(mdev->net_conf == NULL); + spin_lock_irq(&mdev->req_lock); + if (mdev->net_conf != NULL) { + retcode = ERR_NET_CONFIGURED; + spin_unlock_irq(&mdev->req_lock); + goto fail; + } mdev->net_conf = new_conf; mdev->send_cnt = 0; @@ -1400,13 +1403,13 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->int_dig_out=int_dig_out; mdev->int_dig_in=int_dig_in; mdev->int_dig_vv=int_dig_vv; + spin_unlock_irq(&mdev->req_lock); retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE); - if (retcode >= SS_SUCCESS) - drbd_thread_start(&mdev->worker); kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); reply->ret_code = retcode; + drbd_reconfig_done(mdev); return 0; fail: @@ -1421,6 +1424,7 @@ fail: kfree(new_conf); reply->ret_code = retcode; + drbd_reconfig_done(mdev); return 0; } @@ -1938,8 +1942,10 @@ STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl if (skip_initial_sync) { drbd_send_uuids_skip_initial_sync(mdev); _drbd_uuid_set(mdev, UI_BITMAP, 0); + spin_lock_irq(&mdev->req_lock); _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), CS_VERBOSE, NULL); + spin_unlock_irq(&mdev->req_lock); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 26ac8fd0e1f4..078ce728a2fc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3225,105 +3225,55 @@ recv_bm_rle_bits(struct drbd_conf *mdev, return (s == c->bm_bits) ? DONE : OK; } - -static enum receive_bitmap_ret -recv_bm_rle_bytes(struct drbd_conf *mdev, - struct p_compressed_bm *p, - struct bm_xfer_ctx *c) -{ - u64 rl; - unsigned char *buf = p->code; - unsigned long s; - unsigned long e; - int len = p->head.length - (p->code - p->head.payload); - int toggle; - int n; - - s = c->bit_offset; - - /* decoding. the payload of bitmap rle packets is VLI encoded - * runlength of set and unset bits, starting with set/unset as defined - * in p->encoding & 0x80. */ - for (toggle = DCBP_get_start(p); len; s += rl, toggle = !toggle) { - if (s >= c->bm_bits) { - dev_err(DEV, "bitmap overflow (s:%lu) while decoding bitmap RLE packet\n", s); - return FAILED; - } - - n = vli_decode_bytes(&rl, buf, len); - if (n == 0) /* incomplete buffer! */ - return FAILED; - buf += n; - len -= n; - - if (rl == 0) { - dev_err(DEV, "unexpected zero runlength while decoding bitmap RLE packet\n"); - return FAILED; - } - - /* unset bits: ignore, because of x | 0 == x. */ - if (!toggle) - continue; - - /* set bits: merge into bitmap. */ - e = s + rl -1; - if (e >= c->bm_bits) { - dev_err(DEV, "bitmap overflow (e:%lu) while decoding bitmap RLE packet\n", e); - return FAILED; - } - _drbd_bm_set_bits(mdev, s, e); - } - - c->bit_offset = s; - bm_xfer_ctx_bit_to_word_offset(c); - - return (s == c->bm_bits) ? DONE : OK; -} - static enum receive_bitmap_ret decode_bitmap_c(struct drbd_conf *mdev, struct p_compressed_bm *p, struct bm_xfer_ctx *c) { - switch (DCBP_get_code(p)) { - /* no default! I want the compiler to warn me! */ - case RLE_VLI_BitsFibD_0_1: - case RLE_VLI_BitsFibD_1_1: - case RLE_VLI_BitsFibD_1_2: - case RLE_VLI_BitsFibD_2_3: - break; /* TODO */ - case RLE_VLI_BitsFibD_3_5: + if (DCBP_get_code(p) == RLE_VLI_Bits) return recv_bm_rle_bits(mdev, p, c); - case RLE_VLI_Bytes: - return recv_bm_rle_bytes(mdev, p, c); - } + + /* other variants had been implemented for evaluation, + * but have been dropped as this one turned out to be "best" + * during all our tests. */ + dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding); + drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); return FAILED; } void INFO_bm_xfer_stats(struct drbd_conf *mdev, const char *direction, struct bm_xfer_ctx *c) { - unsigned plain_would_take = sizeof(struct p_header) * + /* what would it take to transfer it "plaintext" */ + unsigned plain = sizeof(struct p_header) * ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) + c->bm_words * sizeof(long); unsigned total = c->bytes[0] + c->bytes[1]; - unsigned q, r; + unsigned r; /* total can not be zero. but just in case: */ if (total == 0) return; - q = plain_would_take / total; - r = plain_would_take % total; - r = (r > UINT_MAX/100) ? (r / (total+99/100)) : (100 * r / total); + /* don't report if not compressed */ + if (total >= plain) + return; + + /* total < plain. check for overflow, still */ + r = (total > UINT_MAX/1000) ? (total / (plain/1000)) + : (1000 * total / plain); + + if (r > 1000) + r = 1000; + r = 1000 - r; dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " - "total %u; compression factor: %u.%02u\n", + "total %u; compression: %u.%u%%\n", direction, c->bytes[1], c->packets[1], c->bytes[0], c->packets[0], - total, q, r); + total, r/10, r % 10); } /* Since we are processing the bitfield from lower addresses to higher, @@ -3689,9 +3639,9 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) * we already released the socket!? */ i = atomic_read(&mdev->pp_in_use); if (i) - dev_dbg(DEV, "pp_in_use = %u, expected 0\n", i); + dev_info(DEV, "pp_in_use = %u, expected 0\n", i); if (!list_empty(&mdev->net_ee)) - dev_dbg(DEV, "net_ee not empty!\n"); + dev_info(DEV, "net_ee not empty!\n"); D_ASSERT(list_empty(&mdev->read_ee)); D_ASSERT(list_empty(&mdev->active_ee)); diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h index 8f5e8186852d..fc824006e721 100644 --- a/drivers/block/drbd/drbd_vli.h +++ b/drivers/block/drbd/drbd_vli.h @@ -37,15 +37,15 @@ * * We never actually need to encode a "zero" (runlengths are positive). * But then we have to store the value of the first bit. - * So we can as well have the "zero" be a valid runlength, - * and start encoding/decoding by "number of _set_ bits" by convention. + * The first bit of information thus shall encode if the first runlength + * gives the number of set or unset bits. * * We assume that large areas are either completely set or unset, * which gives good compression with any runlength method, * even when encoding the runlength as fixed size 32bit/64bit integers. * * Still, there may be areas where the polarity flips every few bits, - * and encoding the runlength sequence of those ares with fix size + * and encoding the runlength sequence of those areas with fix size * integers would be much worse than plaintext. * * We want to encode small runlength values with minimum code length, @@ -53,105 +53,147 @@ * * Thus we need a Variable Length Integer encoding, VLI. * - * For runlength < 8, we produce more code bits than plaintext input. - * we need to send incompressible chunks as plaintext, skip over them + * For some cases, we produce more code bits than plaintext input. + * We need to send incompressible chunks as plaintext, skip over them * and then see if the next chunk compresses better. * - * We don't care too much about "excellent" compression ratio - * for large runlengths, 249 bit/24 bit still gives a factor of > 10. + * We don't care too much about "excellent" compression ratio for large + * runlengths (all set/all clear): whether we achieve a factor of 100 + * or 1000 is not that much of an issue. + * We do not want to waste too much on short runlengths in the "noisy" + * parts of the bitmap, though. * - * We care for cpu time needed to actually encode/decode - * into the transmitted byte stream. + * There are endless variants of VLI, we experimented with: + * * simple byte-based + * * various bit based with different code word length. * - * There are endless variants of VLI. - * For this special purpose, we just need something that is "good enough", - * and easy to understand and code, fast to encode and decode, - * and does not consume memory. + * To avoid yet an other configuration parameter (choice of bitmap compression + * algorithm) which was difficult to explain and tune, we just chose the one + * variant that turned out best in all test cases. + * Based on real world usage patterns, with device sizes ranging from a few GiB + * to several TiB, file server/mailserver/webserver/mysql/postgress, + * mostly idle to really busy, the all time winner (though sometimes only + * marginally better) is: */ /* - * buf points to the current position in the tranfered byte stream. - * stream is by definition little endian. - * *buf_len gives the remaining number of bytes at that position. - * *out will receive the decoded value. - * returns number of bytes consumed, - * or 0 if not enough bytes left in buffer (which would be invalid input). - */ -static inline int vli_decode_bytes(u64 *out, unsigned char *buf, unsigned buf_len) -{ - u64 tmp = 0; - unsigned bytes; /* extra bytes after code byte */ + * encoding is "visualised" as + * __little endian__ bitstream, least significant bit first (left most) + * + * this particular encoding is chosen so that the prefix code + * starts as unary encoding the level, then modified so that + * 10 levels can be described in 8bit, with minimal overhead + * for the smaller levels. + * + * Number of data bits follow fibonacci sequence, with the exception of the + * last level (+1 data bit, so it makes 64bit total). The only worse code when + * encoding bit polarity runlength is 1 plain bits => 2 code bits. +prefix data bits max val Nº data bits +0 x 0x2 1 +10 x 0x4 1 +110 xx 0x8 2 +1110 xxx 0x10 3 +11110 xxx xx 0x30 5 +111110 xx xxxxxx 0x130 8 +11111100 xxxxxxxx xxxxx 0x2130 13 +11111110 xxxxxxxx xxxxxxxx xxxxx 0x202130 21 +11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202130 34 +11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56 + * maximum encodable value: 0x100000400202130 == 2**56 + some */ + +/* compression "table": + transmitted x 0.29 + as plaintext x ........................ + x ........................ + x ........................ + x 0.59 0.21........................ + x ........................................................ + x .. c ................................................... + x 0.44.. o ................................................... + x .......... d ................................................... + x .......... e ................................................... + X............. ................................................... + x.............. b ................................................... +2.0x............... i ................................................... + #X................ t ................................................... + #................. s ........................... plain bits .......... +-+----------------------------------------------------------------------- + 1 16 32 64 +*/ - if (buf_len == 0) - return 0; +/* LEVEL: (total bits, prefix bits, prefix value), + * sorted ascending by number of total bits. + * The rest of the code table is calculated at compiletime from this. */ - switch(*buf) { - case 0xff: bytes = 8; break; - case 0xfe: bytes = 7; break; - case 0xfd: bytes = 6; break; - case 0xfc: bytes = 5; break; - case 0xfb: bytes = 4; break; - case 0xfa: bytes = 3; break; - case 0xf9: bytes = 2; break; - default: - *out = *buf; - return 1; - } +/* fibonacci data 1, 1, ... */ +#define VLI_L_1_1() do { \ + LEVEL( 2, 1, 0x00); \ + LEVEL( 3, 2, 0x01); \ + LEVEL( 5, 3, 0x03); \ + LEVEL( 7, 4, 0x07); \ + LEVEL(10, 5, 0x0f); \ + LEVEL(14, 6, 0x1f); \ + LEVEL(21, 8, 0x3f); \ + LEVEL(29, 8, 0x7f); \ + LEVEL(42, 8, 0xbf); \ + LEVEL(64, 8, 0xff); \ + } while (0) - if (buf_len <= bytes) - return 0; +/* finds a suitable level to decode the least significant part of in. + * returns number of bits consumed. + * + * BUG() for bad input, as that would mean a buggy code table. */ +static inline int vli_decode_bits(u64 *out, const u64 in) +{ + u64 adj = 1; + +#define LEVEL(t,b,v) \ + do { \ + if ((in & ((1 << b) -1)) == v) { \ + *out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \ + return t; \ + } \ + adj += 1ULL << (t - b); \ + } while (0) + + VLI_L_1_1(); - /* no pointer cast assignment, there may be funny alignment - * requirements on certain architectures */ - memcpy(&tmp, buf+1, bytes); - *out = le64_to_cpu(tmp); - return bytes+1; + /* NOT REACHED, if VLI_LEVELS code table is defined properly */ + BUG(); +#undef LEVEL } -/* - * similarly, encode n into buf. - * returns consumed bytes, - * or zero if not enough room left in buffer - * (in which case the buf is left unchanged). - * - * encoding is little endian, first byte codes how much bytes follow. - * first byte <= 0xf8 means just this byte, value = code byte. - * first byte == 0xf9 .. 0xff: (code byte - 0xf7) data bytes follow. - */ -static inline int vli_encode_bytes(unsigned char *buf, u64 n, unsigned buf_len) +/* return number of code bits needed, + * or negative error number */ +static inline int __vli_encode_bits(u64 *out, const u64 in) { - unsigned bytes; /* _extra_ bytes after code byte */ - - if (buf_len == 0) - return 0; + u64 max = 0; + u64 adj = 1; - if (n <= 0xf8) { - *buf = (unsigned char)n; - return 1; - } + if (in == 0) + return -EINVAL; - bytes = (n < (1ULL << 32)) - ? (n < (1ULL << 16)) ? 2 - : (n < (1ULL << 24)) ? 3 : 4 - : (n < (1ULL << 48)) ? - (n < (1ULL << 40)) ? 5 : 6 - : (n < (1ULL << 56)) ? 7 : 8; +#define LEVEL(t,b,v) do { \ + max += 1ULL << (t - b); \ + if (in <= max) { \ + if (out) \ + *out = ((in - adj) << b) | v; \ + return t; \ + } \ + adj = max + 1; \ + } while (0) - if (buf_len <= bytes) - return 0; + VLI_L_1_1(); - /* no pointer cast assignment, there may be funny alignment - * requirements on certain architectures */ - *buf++ = 0xf7 + bytes; /* code, 0xf9 .. 0xff */ - n = cpu_to_le64(n); - memcpy(buf, &n, bytes); /* plain */ - return bytes+1; + return -EOVERFLOW; +#undef LEVEL } -/* ================================================================== */ +#undef VLI_L_1_1 -/* And here the more involved variants of VLI. - * +/* code from here down is independend of actually used bit code */ + +/* * Code length is determined by some unique (e.g. unary) prefix. * This encodes arbitrary bit length, not whole bytes: we have a bit-stream, * not a byte stream. @@ -287,164 +329,6 @@ static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits) return bits; } -/* we still need to actually define the code. */ - -/* - * encoding is "visualised" as - * __little endian__ bitstream, least significant bit first (left most) - * - * this particular encoding is chosen so that the prefix code - * starts as unary encoding the level, then modified so that - * 11 levels can be described in 8bit, with minimal overhead - * for the smaller levels. - * - * Number of data bits follow fibonacci sequence, with the exception of the - * last level (+1 data bit, so it makes 64bit total). The only worse code when - * encoding bit polarity runlength is 2 plain bits => 3 code bits. -prefix data bits max val Nº data bits -0 0x1 0 -10 x 0x3 1 -110 x 0x5 1 -1110 xx 0x9 2 -11110 xxx 0x11 3 -1111100 x xxxx 0x31 5 -1111101 x xxxxxxx 0x131 8 -11111100 xxxxxxxx xxxxx 0x2131 13 -11111110 xxxxxxxx xxxxxxxx xxxxx 0x202131 21 -11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202131 34 -11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56 - * maximum encodable value: 0x100000400202131 == 2**56 + some */ - -/* LEVEL: (total bits, prefix bits, prefix value), - * sorted ascending by number of total bits. - * The rest of the code table is calculated at compiletime from this. */ - -/* fibonacci data 0, 1, ... */ -#define VLI_L_0_1() do { \ - LEVEL( 1, 1, 0x00); \ - LEVEL( 3, 2, 0x01); \ - LEVEL( 4, 3, 0x03); \ - LEVEL( 6, 4, 0x07); \ - LEVEL( 8, 5, 0x0f); \ - LEVEL(12, 7, 0x1f); \ - LEVEL(15, 7, 0x5f); \ - LEVEL(21, 8, 0x3f); \ - LEVEL(29, 8, 0x7f); \ - LEVEL(42, 8, 0xbf); \ - LEVEL(64, 8, 0xff); \ - } while (0) - -/* Some variants, differeing in number of levels, prefix value, and number of - * databits in each level. I tried a lot of variants. Those where the number - * of data bits follows the fibonacci sequence (with a certain offset) simply - * "look best" ;-) - * All of these can encode at least "2 ** 56". */ - -/* fibonacci data 1, 1, ... */ -#define VLI_L_1_1() do { \ - LEVEL( 2, 1, 0x00); \ - LEVEL( 3, 2, 0x01); \ - LEVEL( 5, 3, 0x03); \ - LEVEL( 7, 4, 0x07); \ - LEVEL(10, 5, 0x0f); \ - LEVEL(14, 6, 0x1f); \ - LEVEL(21, 8, 0x3f); \ - LEVEL(29, 8, 0x7f); \ - LEVEL(42, 8, 0xbf); \ - LEVEL(64, 8, 0xff); \ - } while (0) - -/* fibonacci data 1, 2, ... */ -#define VLI_L_1_2() do { \ - LEVEL( 2, 1, 0x00); \ - LEVEL( 4, 2, 0x01); \ - LEVEL( 6, 3, 0x03); \ - LEVEL( 9, 4, 0x07); \ - LEVEL(13, 5, 0x0f); \ - LEVEL(19, 6, 0x1f); \ - LEVEL(28, 7, 0x3f); \ - LEVEL(42, 8, 0x7f); \ - LEVEL(64, 8, 0xff); \ - } while (0) - -/* fibonacci data 2, 3, ... */ -#define VLI_L_2_3() do { \ - LEVEL( 3, 1, 0x00); \ - LEVEL( 5, 2, 0x01); \ - LEVEL( 8, 3, 0x03); \ - LEVEL(12, 4, 0x07); \ - LEVEL(18, 5, 0x0f); \ - LEVEL(27, 6, 0x1f); \ - LEVEL(41, 7, 0x3f); \ - LEVEL(64, 7, 0x5f); \ - } while (0) - -/* fibonacci data 3, 5, ... */ -#define VLI_L_3_5() do { \ - LEVEL( 4, 1, 0x00); \ - LEVEL( 7, 2, 0x01); \ - LEVEL(11, 3, 0x03); \ - LEVEL(17, 4, 0x07); \ - LEVEL(26, 5, 0x0f); \ - LEVEL(40, 6, 0x1f); \ - LEVEL(64, 6, 0x3f); \ - } while (0) - -/* CONFIG */ -#ifndef VLI_LEVELS -#define VLI_LEVELS() VLI_L_3_5() -#endif - -/* finds a suitable level to decode the least significant part of in. - * returns number of bits consumed. - * - * BUG() for bad input, as that would mean a buggy code table. */ -static inline int vli_decode_bits(u64 *out, const u64 in) -{ - u64 adj = 1; - -#define LEVEL(t,b,v) \ - do { \ - if ((in & ((1 << b) -1)) == v) { \ - *out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \ - return t; \ - } \ - adj += 1ULL << (t - b); \ - } while (0) - - VLI_LEVELS(); - - /* NOT REACHED, if VLI_LEVELS code table is defined properly */ - BUG(); -#undef LEVEL -} - -/* return number of code bits needed, - * or negative error number */ -static inline int __vli_encode_bits(u64 *out, const u64 in) -{ - u64 max = 0; - u64 adj = 1; - - if (in == 0) - return -EINVAL; - -#define LEVEL(t,b,v) do { \ - max += 1ULL << (t - b); \ - if (in <= max) { \ - if (out) \ - *out = ((in - adj) << b) | v; \ - return t; \ - } \ - adj = max + 1; \ - } while (0) - - VLI_LEVELS(); - - return -EOVERFLOW; -#undef LEVEL -} - /* encodes @in as vli into @bs; * return values @@ -464,11 +348,4 @@ static inline int vli_encode_bits(struct bitstream *bs, u64 in) return bitstream_put_bits(bs, code, bits); } -#undef VLI_L_0_1 -#undef VLI_L_1_1 -#undef VLI_L_1_2 -#undef VLI_L_2_3 -#undef VLI_L_3_5 - -#undef VLI_LEVELS #endif diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a39ba573743e..4be8814fd50d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1428,6 +1428,8 @@ int drbd_worker(struct drbd_thread *thi) NS(conn, C_NETWORK_FAILURE)); } } + D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags)); + D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags)); spin_lock_irq(&mdev->data.work.q_lock); i = 0; @@ -1460,5 +1462,9 @@ int drbd_worker(struct drbd_thread *thi) dev_info(DEV, "worker terminated\n"); + clear_bit(DEVICE_DYING, &mdev->flags); + clear_bit(CONFIG_PENDING, &mdev->flags); + wake_up(&mdev->state_wait); + return 0; } diff --git a/include/linux/drbd_config.h b/include/linux/drbd_config.h index 63d063ec7f8b..8ba12c10591b 100644 --- a/include/linux/drbd_config.h +++ b/include/linux/drbd_config.h @@ -31,7 +31,6 @@ extern const char *drbd_buildtag(void); #define DBG_ALL_SYMBOLS /* no static functs, improves quality of OOPS traces */ #endif - /* Define this to enable dynamic tracing controlled by module parameters * at run time. This enables ALL use of dynamic tracing including packet * and bio dumping, etc */ -- cgit v1.2.3 From 95eaef1cbc63f73df4ff8559dd66b1d29b462535 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 29 Apr 2009 17:45:27 +0200 Subject: Replaced our own tracing stuff with Linux's tracepoints Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Kconfig | 10 + drivers/block/drbd/Makefile | 3 + drivers/block/drbd/drbd_actlog.c | 111 +++--- drivers/block/drbd/drbd_bitmap.c | 5 - drivers/block/drbd/drbd_int.h | 130 +------ drivers/block/drbd/drbd_main.c | 483 ++--------------------- drivers/block/drbd/drbd_nl.c | 85 +---- drivers/block/drbd/drbd_proc.c | 4 - drivers/block/drbd/drbd_receiver.c | 101 +---- drivers/block/drbd/drbd_req.c | 89 +---- drivers/block/drbd/drbd_req.h | 1 + drivers/block/drbd/drbd_tracing.c | 762 +++++++++++++++++++++++++++++++++++++ drivers/block/drbd/drbd_tracing.h | 87 +++++ drivers/block/drbd/drbd_worker.c | 25 +- drivers/block/drbd/drbd_wrappers.h | 5 + include/linux/drbd_config.h | 5 - 16 files changed, 1006 insertions(+), 900 deletions(-) create mode 100644 drivers/block/drbd/drbd_tracing.c create mode 100644 drivers/block/drbd/drbd_tracing.h diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index c5847ddb24b3..7ad8c2a3c2f3 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -35,3 +35,13 @@ config BLK_DEV_DRBD See also: http://www.drbd.org/, http://www.linux-ha.org If unsure, say N. + +config DRBD_TRACE + tristate "DRBD tracing" + depends on BLK_DEV_DRBD + select TRACEPOINTS + help + + Say Y here if you want to be able to trace various events in DRBD. + + If unsure, say N. diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 7237c339f7da..f0f805cd2051 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -2,4 +2,7 @@ drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += lru_cache.o drbd_main.o drbd_strings.o drbd_nl.o +drbd_trace-y := drbd_tracing.o drbd_strings.o + obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o +obj-$(CONFIG_DRBD_TRACE) += drbd_trace.o diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index fbbddd3d7f31..c894b4fa6af0 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -26,6 +26,7 @@ #include #include #include "drbd_int.h" +#include "drbd_tracing.h" #include "drbd_wrappers.h" /* I do not believe that all storage medias can guarantee atomic @@ -65,6 +66,17 @@ struct drbd_atodb_wait { int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); +/* The actual tracepoint needs to have constant number of known arguments... + */ +void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + trace__drbd_resync(mdev, level, fmt, ap); + va_end(ap); +} + STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, struct page *page, sector_t sector, @@ -93,7 +105,7 @@ STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev, bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; - dump_internal_bio("Md", mdev, bio, 0); + trace_drbd_bio(mdev, "Md", bio, 0, NULL); if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) bio_endio(bio, -EIO); @@ -223,11 +235,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) D_ASSERT(atomic_read(&mdev->local_cnt) > 0); - MTRACE(TRACE_TYPE_AL_EXTS, TRACE_LVL_METRICS, - dev_info(DEV, "al_begin_io( sec=%llus (al_enr=%u) (rs_enr=%d) )\n", - (unsigned long long) sector, enr, - (int)BM_SECT_TO_EXT(sector)); - ); + trace_drbd_actlog(mdev, sector, "al_begin_io"); wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr))); @@ -260,11 +268,7 @@ void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) struct lc_element *extent; unsigned long flags; - MTRACE(TRACE_TYPE_AL_EXTS, TRACE_LVL_METRICS, - dev_info(DEV, "al_complete_io( sec=%llus (al_enr=%u) (rs_enr=%d) )\n", - (unsigned long long) sector, enr, - (int)BM_SECT_TO_EXT(sector)); - ); + trace_drbd_actlog(mdev, sector, "al_complete_io"); spin_lock_irqsave(&mdev->al_lock, flags); @@ -763,11 +767,6 @@ static inline int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) lc_del(mdev->act_log, al_ext); spin_unlock_irq(&mdev->al_lock); - MTRACE(TRACE_TYPE_AL_EXTS, TRACE_LVL_METRICS, - if (unlikely(!rv)) - dev_info(DEV, "Waiting for extent in drbd_al_shrink()\n"); - ); - return rv; } @@ -953,10 +952,9 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_METRICS, - dev_info(DEV, "drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", - (unsigned long long)sector, size, sbnr, ebnr); - ); + trace_drbd_resync(mdev, TRACE_LVL_METRICS, + "drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", + (unsigned long long)sector, size, sbnr, ebnr); if (sbnr > ebnr) return; @@ -1032,11 +1030,9 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, sbnr = BM_SECT_TO_BIT(sector); ebnr = BM_SECT_TO_BIT(esector); - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_METRICS, - dev_info(DEV, "drbd_set_out_of_sync: sector=%llus size=%u " - "sbnr=%lu ebnr=%lu\n", - (unsigned long long)sector, size, sbnr, ebnr); - ); + trace_drbd_resync(mdev, TRACE_LVL_METRICS, + "drbd_set_out_of_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n", + (unsigned long long)sector, size, sbnr, ebnr); /* ok, (capacity & 7) != 0 sometimes, but who cares... * we count rs_{total,left} in bits, not sectors. */ @@ -1133,10 +1129,9 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; int i, sig; - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, - dev_info(DEV, "drbd_rs_begin_io: sector=%llus (rs_end=%d)\n", - (unsigned long long)sector, enr); - ); + trace_drbd_resync(mdev, TRACE_LVL_ALL, + "drbd_rs_begin_io: sector=%llus (rs_end=%d)\n", + (unsigned long long)sector, enr); sig = wait_event_interruptible(mdev->al_wait, (bm_ext = _bme_get(mdev, enr))); @@ -1183,10 +1178,8 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; int i; - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, - dev_info(DEV, "drbd_try_rs_begin_io: sector=%llus\n", - (unsigned long long)sector); - ); + trace_drbd_resync(mdev, TRACE_LVL_ALL, "drbd_try_rs_begin_io: sector=%llus\n", + (unsigned long long)sector); spin_lock_irq(&mdev->al_lock); if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) { @@ -1203,10 +1196,11 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) * the lc_put here... * we also have to wake_up */ - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, - dev_info(DEV, "dropping %u, aparently got 'synced' " - "by application io\n", mdev->resync_wenr); - ); + + trace_drbd_resync(mdev, TRACE_LVL_ALL, + "dropping %u, aparently got 'synced' by application io\n", + mdev->resync_wenr); + bm_ext = (struct bm_extent *) lc_find(mdev->resync, mdev->resync_wenr); if (bm_ext) { @@ -1232,18 +1226,18 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) * but then could not set BME_LOCKED, * so we tried again. * drop the extra reference. */ - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, - dev_info(DEV, "dropping extra reference on %u\n", enr); - ); + trace_drbd_resync(mdev, TRACE_LVL_ALL, + "dropping extra reference on %u\n", enr); + bm_ext->lce.refcnt--; D_ASSERT(bm_ext->lce.refcnt > 0); } goto check_al; } else { if (mdev->resync_locked > mdev->resync->nr_elements-3) { - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, - dev_info(DEV, "resync_locked = %u!\n", mdev->resync_locked); - ); + trace_drbd_resync(mdev, TRACE_LVL_ALL, + "resync_locked = %u!\n", mdev->resync_locked); + goto try_again; } bm_ext = (struct bm_extent *)lc_get(mdev->resync, enr); @@ -1268,9 +1262,8 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) goto check_al; } check_al: - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, - dev_info(DEV, "checking al for %u\n", enr); - ); + trace_drbd_resync(mdev, TRACE_LVL_ALL, "checking al for %u\n", enr); + for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { if (unlikely(al_enr+i == mdev->act_log->new_number)) goto try_again; @@ -1284,9 +1277,7 @@ proceed: return 0; try_again: - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, - dev_info(DEV, "need to try again for %u\n", enr); - ); + trace_drbd_resync(mdev, TRACE_LVL_ALL, "need to try again for %u\n", enr); if (bm_ext) mdev->resync_wenr = enr; spin_unlock_irq(&mdev->al_lock); @@ -1299,10 +1290,9 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) struct bm_extent *bm_ext; unsigned long flags; - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_ALL, - dev_info(DEV, "drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n", - (long long)sector, enr); - ); + trace_drbd_resync(mdev, TRACE_LVL_ALL, + "drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n", + (long long)sector, enr); spin_lock_irqsave(&mdev->al_lock, flags); bm_ext = (struct bm_extent *) lc_find(mdev->resync, enr); @@ -1336,9 +1326,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) */ void drbd_rs_cancel_all(struct drbd_conf *mdev) { - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_METRICS, - dev_info(DEV, "drbd_rs_cancel_all\n"); - ); + trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_cancel_all\n"); spin_lock_irq(&mdev->al_lock); @@ -1363,9 +1351,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) struct bm_extent *bm_ext; int i; - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_METRICS, - dev_info(DEV, "drbd_rs_del_all\n"); - ); + trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_del_all\n"); spin_lock_irq(&mdev->al_lock); @@ -1417,10 +1403,9 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) sector_t esector, nr_sectors; int wake_up = 0; - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_SUMMARY, - dev_info(DEV, "drbd_rs_failed_io: sector=%llus, size=%u\n", - (unsigned long long)sector, size); - ); + trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, + "drbd_rs_failed_io: sector=%llus, size=%u\n", + (unsigned long long)sector, size); if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 0614f2c96f15..c160f7ab9e01 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -842,11 +842,6 @@ STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) drbd_blk_run_queue(bdev_get_queue(mdev->bc->md_bdev)); wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); - MTRACE(TRACE_TYPE_MD_IO, TRACE_LVL_SUMMARY, - dev_info(DEV, "%s of bitmap took %lu jiffies\n", - rw == READ ? "reading" : "writing", jiffies - now); - ); - if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); drbd_chk_io_error(mdev, 1, TRUE); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ba43fa57b750..175de11d31e4 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -151,6 +151,8 @@ enum { DRBD_FAULT_MAX, }; +extern void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...); + #ifdef DRBD_ENABLE_FAULTS extern unsigned int _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type); @@ -713,6 +715,19 @@ enum { DE_IS_FINISHING, }; +enum epoch_event { + EV_PUT, + EV_GOT_BARRIER_NR, + EV_BARRIER_DONE, + EV_BECAME_LAST, + EV_TRACE_FLUSH, /* TRACE_ are not real events, only used for tracing */ + EV_TRACE_ADD_BARRIER, /* Doing the first write as a barrier write */ + EV_TRACE_SETTING_BI, /* Barrier is expressed with the first write of the next epoch */ + EV_TRACE_ALLOC, + EV_TRACE_FREE, + EV_CLEANUP = 32, /* used as flag */ +}; + struct drbd_epoch_entry { struct drbd_work w; struct drbd_conf *mdev; @@ -1350,117 +1365,7 @@ extern rwlock_t global_state_lock; extern struct drbd_conf *drbd_new_device(unsigned int minor); extern void drbd_free_mdev(struct drbd_conf *mdev); -/* Dynamic tracing framework */ -#ifdef ENABLE_DYNAMIC_TRACE - extern int proc_details; -extern int trace_type; -extern int trace_devs; -extern int trace_level; - -enum { - TRACE_LVL_ALWAYS = 0, - TRACE_LVL_SUMMARY, - TRACE_LVL_METRICS, - TRACE_LVL_ALL, - TRACE_LVL_MAX -}; - -enum { - TRACE_TYPE_PACKET = 0x00000001, - TRACE_TYPE_RQ = 0x00000002, - TRACE_TYPE_UUID = 0x00000004, - TRACE_TYPE_RESYNC = 0x00000008, - TRACE_TYPE_EE = 0x00000010, - TRACE_TYPE_UNPLUG = 0x00000020, - TRACE_TYPE_NL = 0x00000040, - TRACE_TYPE_AL_EXTS = 0x00000080, - TRACE_TYPE_INT_RQ = 0x00000100, - TRACE_TYPE_MD_IO = 0x00000200, - TRACE_TYPE_EPOCHS = 0x00000400, -}; - -static inline int -is_trace(unsigned int type, unsigned int level) { - return (trace_level >= level) && (type & trace_type); -} -static inline int -is_mdev_trace(struct drbd_conf *mdev, unsigned int type, unsigned int level) { - return is_trace(type, level) && - ((1 << mdev_to_minor(mdev)) & trace_devs); -} - -#define MTRACE(type, lvl, code...) \ -do { \ - if (unlikely(is_mdev_trace(mdev, type, lvl))) { \ - code \ - } \ -} while (0) - -#define TRACE(type, lvl, code...) \ -do { \ - if (unlikely(is_trace(type, lvl))) { \ - code \ - } \ -} while (0) - -/* Buffer printing support - * dbg_print_flags: used for Flags arg to drbd_print_buffer - * - DBGPRINT_BUFFADDR; if set, each line starts with the - * virtual address of the line being output. If clear, - * each line starts with the offset from the beginning - * of the buffer. */ -enum dbg_print_flags { - DBGPRINT_BUFFADDR = 0x0001, -}; - -extern void drbd_print_uuid(struct drbd_conf *mdev, unsigned int idx); - -extern void drbd_print_buffer(const char *prefix, unsigned int flags, int size, - const void *buffer, const void *buffer_va, - unsigned int length); - -/* Bio printing support */ -extern void _dump_bio(const char *pfx, struct drbd_conf *mdev, struct bio *bio, int complete, struct drbd_request *r); - -static inline void dump_bio(struct drbd_conf *mdev, - struct bio *bio, int complete, struct drbd_request *r) -{ - MTRACE(TRACE_TYPE_RQ, TRACE_LVL_SUMMARY, - _dump_bio("Rq", mdev, bio, complete, r); - ); -} - -static inline void dump_internal_bio(const char *pfx, struct drbd_conf *mdev, struct bio *bio, int complete) -{ - MTRACE(TRACE_TYPE_INT_RQ, TRACE_LVL_SUMMARY, - _dump_bio(pfx, mdev, bio, complete, NULL); - ); -} - -/* Packet dumping support */ -extern void _dump_packet(struct drbd_conf *mdev, struct socket *sock, - int recv, union p_polymorph *p, - char *file, int line); - -static inline void -dump_packet(struct drbd_conf *mdev, struct socket *sock, - int recv, union p_polymorph *p, char *file, int line) -{ - MTRACE(TRACE_TYPE_PACKET, TRACE_LVL_SUMMARY, - _dump_packet(mdev, sock, recv, p, file, line); - ); -} - -#else - -#define MTRACE(ignored...) ((void)0) -#define TRACE(ignored...) ((void)0) - -#define dump_bio(ignored...) ((void)0) -#define dump_internal_bio(ignored...) ((void)0) -#define dump_packet(ignored...) ((void)0) -#endif /* drbd_req */ extern int drbd_make_request_26(struct request_queue *q, struct bio *bio); @@ -2237,11 +2142,6 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) { mdev->ed_uuid = val; - - MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, - dev_info(DEV, " exposed data uuid now %016llX\n", - (unsigned long long)val); - ); } static inline int seq_cmp(u32 a, u32 b) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 79cb0183f817..4a2593ce1d37 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -53,6 +53,7 @@ #include #include #include "drbd_int.h" +#include "drbd_tracing.h" #include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ #include "drbd_vli.h" @@ -79,6 +80,18 @@ STATIC int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); STATIC void md_sync_timer_fn(unsigned long data); STATIC int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); +DEFINE_TRACE(drbd_unplug); +DEFINE_TRACE(drbd_uuid); +DEFINE_TRACE(drbd_ee); +DEFINE_TRACE(drbd_packet); +DEFINE_TRACE(drbd_md_io); +DEFINE_TRACE(drbd_epoch); +DEFINE_TRACE(drbd_netlink); +DEFINE_TRACE(drbd_actlog); +DEFINE_TRACE(drbd_bio); +DEFINE_TRACE(_drbd_resync); +DEFINE_TRACE(drbd_req); + MODULE_AUTHOR("Philipp Reisner , " "Lars Ellenberg "); MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); @@ -95,6 +108,7 @@ module_param(minor_count, uint, 0444); module_param(disable_sendpage, bool, 0644); module_param(allow_oos, bool, 0); module_param(cn_idx, uint, 0444); +module_param(proc_details, int, 0644); #ifdef DRBD_ENABLE_FAULTS int enable_faults; @@ -116,19 +130,8 @@ unsigned int minor_count = 32; int disable_sendpage; int allow_oos; unsigned int cn_idx = CN_IDX_DRBD; - -#ifdef ENABLE_DYNAMIC_TRACE -int trace_type; /* UI_BITMAP of trace types to enable */ -int trace_level; /* UI_CURRENT trace level */ -int trace_devs; /* UI_BITMAP of devices to trace */ int proc_details; /* Detail level in proc drbd*/ -module_param(trace_level, int, 0644); -module_param(trace_type, int, 0644); -module_param(trace_devs, int, 0644); -module_param(proc_details, int, 0644); -#endif - /* Module parameter for setting the user mode helper program * to run. Default is /sbin/drbdadm */ char usermode_helper[80] = "/sbin/drbdadm"; @@ -1458,7 +1461,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, h->command = cpu_to_be16(cmd); h->length = cpu_to_be16(size-sizeof(struct p_header)); - dump_packet(mdev, sock, 0, (void *)h, __FILE__, __LINE__); + trace_drbd_packet(mdev, sock, 0, (void *)h, __FILE__, __LINE__); sent = drbd_send(mdev, sock, h, size, msg_flags); ok = (sent == size); @@ -1510,7 +1513,7 @@ int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, if (!drbd_get_data_sock(mdev)) return 0; - dump_packet(mdev, mdev->data.socket, 0, (void *)&h, __FILE__, __LINE__); + trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&h, __FILE__, __LINE__); ok = (sizeof(h) == drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0)); @@ -2227,7 +2230,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dp_flags |= DP_MAY_SET_IN_SYNC; p.dp_flags = cpu_to_be32(dp_flags); - dump_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); + trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); set_bit(UNPLUG_REMOTE, &mdev->flags); ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); @@ -2278,7 +2281,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, if (!drbd_get_data_sock(mdev)) return 0; - dump_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); + trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__); ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE); if (ok && dgs) { @@ -2414,10 +2417,7 @@ STATIC void drbd_unplug_fn(struct request_queue *q) { struct drbd_conf *mdev = q->queuedata; - MTRACE(TRACE_TYPE_UNPLUG, TRACE_LVL_SUMMARY, - dev_info(DEV, "got unplugged ap_bio_count=%d\n", - atomic_read(&mdev->ap_bio_cnt)); - ); + trace_drbd_unplug(mdev, "got unplugged"); /* unplug FIRST */ spin_lock_irq(q->queue_lock); @@ -3098,9 +3098,7 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!inc_local_if_state(mdev, D_FAILED)) return; - MTRACE(TRACE_TYPE_MD_IO, TRACE_LVL_SUMMARY, - dev_info(DEV, "Writing meta data super block now.\n"); - ); + trace_drbd_md_io(mdev, WRITE, mdev->bc); mutex_lock(&mdev->md_io_mutex); buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); @@ -3156,6 +3154,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!inc_local_if_state(mdev, D_ATTACHING)) return ERR_IO_MD_DISK; + trace_drbd_md_io(mdev, READ, bdev); + mutex_lock(&mdev->md_io_mutex); buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); @@ -3235,9 +3235,7 @@ STATIC void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) { mdev->bc->md.uuid[i+1] = mdev->bc->md.uuid[i]; - MTRACE(TRACE_TYPE_UUID, TRACE_LVL_ALL, - drbd_print_uuid(mdev, i+1); - ); + trace_drbd_uuid(mdev, i+1); } } @@ -3253,11 +3251,7 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) } mdev->bc->md.uuid[idx] = val; - - MTRACE(TRACE_TYPE_UUID, TRACE_LVL_SUMMARY, - drbd_print_uuid(mdev, idx); - ); - + trace_drbd_uuid(mdev, idx); drbd_md_mark_dirty(mdev); } @@ -3267,9 +3261,7 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) if (mdev->bc->md.uuid[idx]) { drbd_uuid_move_history(mdev); mdev->bc->md.uuid[UI_HISTORY_START] = mdev->bc->md.uuid[idx]; - MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, - drbd_print_uuid(mdev, UI_HISTORY_START); - ); + trace_drbd_uuid(mdev, UI_HISTORY_START); } _drbd_uuid_set(mdev, idx, val); } @@ -3286,9 +3278,7 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) dev_info(DEV, "Creating new current UUID\n"); D_ASSERT(mdev->bc->md.uuid[UI_BITMAP] == 0); mdev->bc->md.uuid[UI_BITMAP] = mdev->bc->md.uuid[UI_CURRENT]; - MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, - drbd_print_uuid(mdev, UI_BITMAP); - ); + trace_drbd_uuid(mdev, UI_BITMAP); get_random_bytes(&val, sizeof(u64)); _drbd_uuid_set(mdev, UI_CURRENT, val); @@ -3303,11 +3293,8 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) drbd_uuid_move_history(mdev); mdev->bc->md.uuid[UI_HISTORY_START] = mdev->bc->md.uuid[UI_BITMAP]; mdev->bc->md.uuid[UI_BITMAP] = 0; - - MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, - drbd_print_uuid(mdev, UI_HISTORY_START); - drbd_print_uuid(mdev, UI_BITMAP); - ); + trace_drbd_uuid(mdev, UI_HISTORY_START); + trace_drbd_uuid(mdev, UI_BITMAP); } else { if (mdev->bc->md.uuid[UI_BITMAP]) dev_warn(DEV, "bm UUID already set"); @@ -3315,9 +3302,7 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) mdev->bc->md.uuid[UI_BITMAP] = val; mdev->bc->md.uuid[UI_BITMAP] &= ~((u64)1); - MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, - drbd_print_uuid(mdev, UI_BITMAP); - ); + trace_drbd_uuid(mdev, UI_BITMAP); } drbd_md_mark_dirty(mdev); } @@ -3553,415 +3538,5 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) } #endif -#ifdef ENABLE_DYNAMIC_TRACE - -STATIC char *_drbd_uuid_str(unsigned int idx) -{ - static char *uuid_str[] = { - "Current", - "Bitmap", - "History_start", - "History_end", - "UUID_SIZE", - "UUID_FLAGS", - }; - - return (idx < UI_EXTENDED_SIZE) ? uuid_str[idx] : "*Unknown UUID index*"; -} - -/* Pretty print a UUID value */ -void drbd_print_uuid(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) -{ - dev_info(DEV, " uuid[%s] now %016llX\n", - _drbd_uuid_str(idx), (unsigned long long)mdev->bc->md.uuid[idx]); -} - - -/* - * - * drbd_print_buffer - * - * This routine dumps binary data to the debugging output. Can be - * called at interrupt level. - * - * Arguments: - * - * prefix - String is output at the beginning of each line output - * flags - Control operation of the routine. Currently defined - * Flags are: - * DBGPRINT_BUFFADDR; if set, each line starts with the - * virtual address of the line being outupt. If clear, - * each line starts with the offset from the beginning - * of the buffer. - * size - Indicates the size of each entry in the buffer. Supported - * values are sizeof(char), sizeof(short) and sizeof(int) - * buffer - Start address of buffer - * buffer_va - Virtual address of start of buffer (normally the same - * as Buffer, but having it separate allows it to hold - * file address for example) - * length - length of buffer - * - */ -void -drbd_print_buffer(const char *prefix, unsigned int flags, int size, - const void *buffer, const void *buffer_va, - unsigned int length) - -#define LINE_SIZE 16 -#define LINE_ENTRIES (int)(LINE_SIZE/size) -{ - const unsigned char *pstart; - const unsigned char *pstart_va; - const unsigned char *pend; - char bytes_str[LINE_SIZE*3+8], ascii_str[LINE_SIZE+8]; - char *pbytes = bytes_str, *pascii = ascii_str; - int offset = 0; - long sizemask; - int field_width; - int index; - const unsigned char *pend_str; - const unsigned char *p; - int count; - - /* verify size parameter */ - if (size != sizeof(char) && - size != sizeof(short) && - size != sizeof(int)) { - printk(KERN_DEBUG "drbd_print_buffer: " - "ERROR invalid size %d\n", size); - return; - } - - sizemask = size-1; - field_width = size*2; - - /* Adjust start/end to be on appropriate boundary for size */ - buffer = (const char *)((long)buffer & ~sizemask); - pend = (const unsigned char *) - (((long)buffer + length + sizemask) & ~sizemask); - - if (flags & DBGPRINT_BUFFADDR) { - /* Move start back to nearest multiple of line size, - * if printing address. This results in nicely formatted output - * with addresses being on line size (16) byte boundaries */ - pstart = (const unsigned char *)((long)buffer & ~(LINE_SIZE-1)); - } else { - pstart = (const unsigned char *)buffer; - } - - /* Set value of start VA to print if addresses asked for */ - pstart_va = (const unsigned char *)buffer_va - - ((const unsigned char *)buffer-pstart); - - /* Calculate end position to nicely align right hand side */ - pend_str = pstart + (((pend-pstart) + LINE_SIZE-1) & ~(LINE_SIZE-1)); - - /* Init strings */ - *pbytes = *pascii = '\0'; - - /* Start at beginning of first line */ - p = pstart; - count = 0; - - while (p < pend_str) { - if (p < (const unsigned char *)buffer || p >= pend) { - /* Before start of buffer or after end- print spaces */ - pbytes += sprintf(pbytes, "%*c ", field_width, ' '); - pascii += sprintf(pascii, "%*c", size, ' '); - p += size; - } else { - /* Add hex and ascii to strings */ - int val; - switch (size) { - default: - case 1: - val = *(unsigned char *)p; - break; - case 2: - val = *(unsigned short *)p; - break; - case 4: - val = *(unsigned int *)p; - break; - } - - pbytes += sprintf(pbytes, "%0*x ", field_width, val); - - for (index = size; index; index--) { - *pascii++ = isprint(*p) ? *p : '.'; - p++; - } - } - - count++; - - if (count == LINE_ENTRIES || p >= pend_str) { - /* Null terminate and print record */ - *pascii = '\0'; - printk(KERN_DEBUG "%s%8.8lx: %*s|%*s|\n", - prefix, - (flags & DBGPRINT_BUFFADDR) - ? (long)pstart_va:(long)offset, - LINE_ENTRIES*(field_width+1), bytes_str, - LINE_SIZE, ascii_str); - - /* Move onto next line */ - pstart_va += (p-pstart); - pstart = p; - count = 0; - offset += LINE_SIZE; - - /* Re-init strings */ - pbytes = bytes_str; - pascii = ascii_str; - *pbytes = *pascii = '\0'; - } - } -} - -#define PSM(A) \ -do { \ - if (mask.A) { \ - int i = snprintf(p, len, " " #A "( %s )", \ - A##s_to_name(val.A)); \ - if (i >= len) \ - return op; \ - p += i; \ - len -= i; \ - } \ -} while (0) - -STATIC char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val) -{ - char *op = p; - *p = '\0'; - PSM(role); - PSM(peer); - PSM(conn); - PSM(disk); - PSM(pdsk); - - return op; -} - -#define INFOP(fmt, args...) \ -do { \ - if (trace_level >= TRACE_LVL_ALL) { \ - dev_info(DEV, "%s:%d: %s [%d] %s %s " fmt , \ - file, line, current->comm, current->pid, \ - sockname, recv ? "<<<" : ">>>" , \ - ## args); \ - } else { \ - dev_info(DEV, "%s %s " fmt, sockname, \ - recv ? "<<<" : ">>>" , \ - ## args); \ - } \ -} while (0) - -STATIC char *_dump_block_id(u64 block_id, char *buff) -{ - if (is_syncer_block_id(block_id)) - strcpy(buff, "SyncerId"); - else - sprintf(buff, "%llx", (unsigned long long)block_id); - - return buff; -} - -void -_dump_packet(struct drbd_conf *mdev, struct socket *sock, - int recv, union p_polymorph *p, char *file, int line) -{ - char *sockname = sock == mdev->meta.socket ? "meta" : "data"; - int cmd = (recv == 2) ? p->header.command : be16_to_cpu(p->header.command); - char tmp[300]; - union drbd_state m, v; - - switch (cmd) { - case P_HAND_SHAKE: - INFOP("%s (protocol %u-%u)\n", cmdname(cmd), - be32_to_cpu(p->handshake.protocol_min), - be32_to_cpu(p->handshake.protocol_max)); - break; - - case P_BITMAP: /* don't report this */ - case P_COMPRESSED_BITMAP: /* don't report this */ - break; - - case P_DATA: - INFOP("%s (sector %llus, id %s, seq %u, f %x)\n", cmdname(cmd), - (unsigned long long)be64_to_cpu(p->data.sector), - _dump_block_id(p->data.block_id, tmp), - be32_to_cpu(p->data.seq_num), - be32_to_cpu(p->data.dp_flags) - ); - break; - - case P_DATA_REPLY: - case P_RS_DATA_REPLY: - INFOP("%s (sector %llus, id %s)\n", cmdname(cmd), - (unsigned long long)be64_to_cpu(p->data.sector), - _dump_block_id(p->data.block_id, tmp) - ); - break; - - case P_RECV_ACK: - case P_WRITE_ACK: - case P_RS_WRITE_ACK: - case P_DISCARD_ACK: - case P_NEG_ACK: - case P_NEG_RS_DREPLY: - INFOP("%s (sector %llus, size %u, id %s, seq %u)\n", - cmdname(cmd), - (long long)be64_to_cpu(p->block_ack.sector), - be32_to_cpu(p->block_ack.blksize), - _dump_block_id(p->block_ack.block_id, tmp), - be32_to_cpu(p->block_ack.seq_num) - ); - break; - - case P_DATA_REQUEST: - case P_RS_DATA_REQUEST: - INFOP("%s (sector %llus, size %u, id %s)\n", cmdname(cmd), - (long long)be64_to_cpu(p->block_req.sector), - be32_to_cpu(p->block_req.blksize), - _dump_block_id(p->block_req.block_id, tmp) - ); - break; - - case P_BARRIER: - case P_BARRIER_ACK: - INFOP("%s (barrier %u)\n", cmdname(cmd), p->barrier.barrier); - break; - - case P_SYNC_PARAM: - case P_SYNC_PARAM89: - INFOP("%s (rate %u, verify-alg \"%.64s\", csums-alg \"%.64s\")\n", - cmdname(cmd), be32_to_cpu(p->rs_param_89.rate), - p->rs_param_89.verify_alg, p->rs_param_89.csums_alg); - break; - - case P_UUIDS: - INFOP("%s Curr:%016llX, Bitmap:%016llX, " - "HisSt:%016llX, HisEnd:%016llX\n", - cmdname(cmd), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_CURRENT]), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_BITMAP]), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_START]), - (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_END])); - break; - - case P_SIZES: - INFOP("%s (d %lluMiB, u %lluMiB, c %lldMiB, " - "max bio %x, q order %x)\n", - cmdname(cmd), - (long long)(be64_to_cpu(p->sizes.d_size)>>(20-9)), - (long long)(be64_to_cpu(p->sizes.u_size)>>(20-9)), - (long long)(be64_to_cpu(p->sizes.c_size)>>(20-9)), - be32_to_cpu(p->sizes.max_segment_size), - be32_to_cpu(p->sizes.queue_order_type)); - break; - - case P_STATE: - v.i = be32_to_cpu(p->state.state); - m.i = 0xffffffff; - dump_st(tmp, sizeof(tmp), m, v); - INFOP("%s (s %x {%s})\n", cmdname(cmd), v.i, tmp); - break; - - case P_STATE_CHG_REQ: - m.i = be32_to_cpu(p->req_state.mask); - v.i = be32_to_cpu(p->req_state.val); - dump_st(tmp, sizeof(tmp), m, v); - INFOP("%s (m %x v %x {%s})\n", cmdname(cmd), m.i, v.i, tmp); - break; - - case P_STATE_CHG_REPLY: - INFOP("%s (ret %x)\n", cmdname(cmd), - be32_to_cpu(p->req_state_reply.retcode)); - break; - - case P_PING: - case P_PING_ACK: - /* - * Dont trace pings at summary level - */ - if (trace_level < TRACE_LVL_ALL) - break; - /* fall through... */ - default: - INFOP("%s (%u)\n", cmdname(cmd), cmd); - break; - } -} - -/* Debug routine to dump info about bio */ - -void _dump_bio(const char *pfx, struct drbd_conf *mdev, struct bio *bio, int complete, struct drbd_request *r) -{ -#ifdef CONFIG_LBD -#define SECTOR_FORMAT "%Lx" -#else -#define SECTOR_FORMAT "%lx" -#endif -#define SECTOR_SHIFT 9 - - unsigned long lowaddr = (unsigned long)(bio->bi_sector << SECTOR_SHIFT); - char *faddr = (char *)(lowaddr); - char rb[sizeof(void *)*2+6] = { 0, }; - struct bio_vec *bvec; - int segno; - - const int rw = bio->bi_rw; - const int biorw = (rw & (RW_MASK|RWA_MASK)); - const int biobarrier = (rw & (1<>>", - pfx, - biorw == WRITE ? "Write" : "Read", - biobarrier ? " : B" : "", - biosync ? " : S" : "", - bio, - rb, - complete ? (drbd_bio_uptodate(bio) ? "Success, " : "Failed, ") : "", - bio->bi_sector << SECTOR_SHIFT, - bio->bi_size); - - if (trace_level >= TRACE_LVL_METRICS && - ((biorw == WRITE) ^ complete)) { - printk(KERN_DEBUG " ind page offset length\n"); - __bio_for_each_segment(bvec, bio, segno, 0) { - printk(KERN_DEBUG " [%d] %p %8.8x %8.8x\n", segno, - bvec->bv_page, bvec->bv_offset, bvec->bv_len); - - if (trace_level >= TRACE_LVL_ALL) { - char *bvec_buf; - unsigned long flags; - - bvec_buf = bvec_kmap_irq(bvec, &flags); - - drbd_print_buffer(" ", DBGPRINT_BUFFADDR, 1, - bvec_buf, - faddr, - (bvec->bv_len <= 0x80) - ? bvec->bv_len : 0x80); - - bvec_kunmap_irq(bvec_buf, &flags); - - if (bvec->bv_len > 0x40) - printk(KERN_DEBUG " ....\n"); - - faddr += bvec->bv_len; - } - } - } -} -#endif - module_init(drbd_init) module_exit(drbd_cleanup) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c72c5adbbfd6..c388478a0188 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -34,8 +34,8 @@ #include #include #include - #include "drbd_int.h" +#include "drbd_tracing.h" #include "drbd_wrappers.h" #include #include @@ -124,51 +124,6 @@ name ## _to_tags(struct drbd_conf *mdev, \ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name); void drbd_nl_send_reply(struct cn_msg *, int); -STATIC char *nl_packet_name(int packet_type) -{ -/* Generate packet type strings */ -#define NL_PACKET(name, number, fields) \ - [P_ ## name] = # name, -#define NL_INTEGER Argh! -#define NL_BIT Argh! -#define NL_INT64 Argh! -#define NL_STRING Argh! - - static char *nl_tag_name[P_nl_after_last_packet] = { -#include "linux/drbd_nl.h" - }; - - return (packet_type < sizeof(nl_tag_name)/sizeof(nl_tag_name[0])) ? - nl_tag_name[packet_type] : "*Unknown*"; -} - -STATIC void nl_trace_packet(void *data) -{ - struct cn_msg *req = data; - struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; - - printk(KERN_INFO "drbd%d: " - "Netlink: << %s (%d) - seq: %x, ack: %x, len: %x\n", - nlp->drbd_minor, - nl_packet_name(nlp->packet_type), - nlp->packet_type, - req->seq, req->ack, req->len); -} - -STATIC void nl_trace_reply(void *data) -{ - struct cn_msg *req = data; - struct drbd_nl_cfg_reply *nlp = (struct drbd_nl_cfg_reply *)req->data; - - printk(KERN_INFO "drbd%d: " - "Netlink: >> %s (%d) - seq: %x, ack: %x, len: %x\n", - nlp->minor, - nlp->packet_type == P_nl_after_last_packet ? - "Empty-Reply" : nl_packet_name(nlp->packet_type), - nlp->packet_type, - req->seq, req->ack, req->len); -} - int drbd_khelper(struct drbd_conf *mdev, char *cmd) { char mb[12]; @@ -711,15 +666,6 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu max_seg_s = min(b->max_sectors * b->hardsect_size, max_seg_s); - MTRACE(TRACE_TYPE_RQ, TRACE_LVL_SUMMARY, - DUMPI(b->max_sectors); - DUMPI(b->max_phys_segments); - DUMPI(b->max_hw_segments); - DUMPI(b->max_segment_size); - DUMPI(b->hardsect_size); - DUMPI(b->seg_boundary_mask); - ); - q->max_sectors = max_seg_s >> 9; if (max_segments) { q->max_phys_segments = max_segments; @@ -733,15 +679,6 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu q->seg_boundary_mask = PAGE_SIZE-1; blk_queue_stack_limits(q, b); - MTRACE(TRACE_TYPE_RQ, TRACE_LVL_SUMMARY, - DUMPI(q->max_sectors); - DUMPI(q->max_phys_segments); - DUMPI(q->max_hw_segments); - DUMPI(q->max_segment_size); - DUMPI(q->hardsect_size); - DUMPI(q->seg_boundary_mask); - ); - if (b->merge_bvec_fn) dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n", b->merge_bvec_fn); @@ -2055,7 +1992,7 @@ STATIC void drbd_connector_callback(void *data) goto fail; } - TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_packet(data);); + trace_drbd_netlink(data, 1); if (nlp->packet_type >= P_nl_after_last_packet) { retcode = ERR_PACKET_NR; @@ -2093,8 +2030,7 @@ STATIC void drbd_connector_callback(void *data) cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr; cn_reply->flags = 0; - TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); - + trace_drbd_netlink(cn_reply, 0); rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); if (rr && rr != -ESRCH) printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); @@ -2193,8 +2129,7 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); - + trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); } @@ -2232,8 +2167,7 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); - + trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); } @@ -2302,8 +2236,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); - + trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); kfree(cn_reply); } @@ -2346,8 +2279,7 @@ void drbd_bcast_sync_progress(struct drbd_conf *mdev) reply->minor = mdev_to_minor(mdev); reply->ret_code = NO_ERROR; - TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); - + trace_drbd_netlink(cn_reply, 0); cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); } @@ -2401,8 +2333,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code) reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor; reply->ret_code = ret_code; - TRACE(TRACE_TYPE_NL, TRACE_LVL_SUMMARY, nl_trace_reply(cn_reply);); - + trace_drbd_netlink(cn_reply, 0); rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); if (rr && rr != -ESRCH) printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 76b512180606..7de68d9d6aba 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -135,7 +135,6 @@ STATIC void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) seq_printf(seq, " K/sec\n"); } -#ifdef ENABLE_DYNAMIC_TRACE STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e) { struct bm_extent *bme = (struct bm_extent *)e; @@ -145,7 +144,6 @@ STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e) bme->flags & BME_LOCKED ? "LOCKED" : "------" ); } -#endif STATIC int drbd_seq_show(struct seq_file *seq, void *v) { @@ -245,7 +243,6 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v) mdev->rs_total - mdev->ov_left, mdev->rs_total); -#ifdef ENABLE_DYNAMIC_TRACE if (proc_details >= 1 && inc_local_if_state(mdev, D_FAILED)) { lc_printf_stats(seq, mdev->resync); lc_printf_stats(seq, mdev->act_log); @@ -258,7 +255,6 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v) resync_dump_detail); } } -#endif } return 0; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 078ce728a2fc..077480fe6923 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -49,6 +49,7 @@ #include #include #include "drbd_int.h" +#include "drbd_tracing.h" #include "drbd_req.h" #include "drbd_vli.h" @@ -58,14 +59,6 @@ struct flush_work { struct drbd_epoch *epoch; }; -enum epoch_event { - EV_PUT, - EV_GOT_BARRIER_NR, - EV_BARRIER_DONE, - EV_BECAME_LAST, - EV_CLEANUP = 32, /* used as flag */ -}; - enum finish_epoch { FE_STILL_LIVE, FE_DESTROYED, @@ -284,10 +277,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, e->epoch = NULL; e->flags = 0; - MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, - dev_info(DEV, "allocated EE sec=%llus size=%u ee=%p\n", - (unsigned long long)sector, data_size, e); - ); + trace_drbd_ee(mdev, e, "allocated"); return e; @@ -308,10 +298,7 @@ void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) struct bio_vec *bvec; int i; - MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, - dev_info(DEV, "Free EE sec=%llus size=%u ee=%p\n", - (unsigned long long)e->sector, e->size, e); - ); + trace_drbd_ee(mdev, e, "freed"); __bio_for_each_segment(bvec, bio, i, 0) { drbd_pp_free(mdev, bvec->bv_page); @@ -390,10 +377,7 @@ STATIC int drbd_process_done_ee(struct drbd_conf *mdev) * all ignore the last argument. */ list_for_each_entry_safe(e, t, &work_list, w.list) { - MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, - dev_info(DEV, "Process EE on done_ee sec=%llus size=%u ee=%p\n", - (unsigned long long)e->sector, e->size, e); - ); + trace_drbd_ee(mdev, e, "process_done_ee"); /* list_del not necessary, next/prev members not touched */ if (e->w.cb(mdev, &e->w, 0) == 0) ok = 0; @@ -998,13 +982,6 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, int schedule_flush = 0; enum finish_epoch rv = FE_STILL_LIVE; - static char *epoch_event_str[] = { - [EV_PUT] = "put", - [EV_GOT_BARRIER_NR] = "got_barrier_nr", - [EV_BARRIER_DONE] = "barrier_done", - [EV_BECAME_LAST] = "became_last", - }; - spin_lock(&mdev->epoch_lock); do { next_epoch = NULL; @@ -1034,15 +1011,7 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, break; } - MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_ALL, - dev_info(DEV, "Update epoch %p/%d { size=%d active=%d %c%c n%c%c } ev=%s\n", - epoch, epoch->barrier_nr, epoch_size, atomic_read(&epoch->active), - test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) ? 'n' : '-', - test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) ? 'b' : '-', - test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) ? 'i' : '-', - test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ? 'd' : '-', - epoch_event_str[ev]); - ); + trace_drbd_epoch(mdev, epoch, ev); if (epoch_size != 0 && atomic_read(&epoch->active) == 0 && @@ -1075,10 +1044,7 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, list_del(&epoch->list); ev = EV_BECAME_LAST | (ev & EV_CLEANUP); mdev->epochs--; - MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_SUMMARY, - dev_info(DEV, "Freeing epoch %p/%d { size=%d } nr_epochs=%d\n", - epoch, epoch->barrier_nr, epoch_size, mdev->epochs); - ); + trace_drbd_epoch(mdev, epoch, EV_TRACE_FREE); kfree(epoch); if (rv == FE_STILL_LIVE) @@ -1104,10 +1070,7 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, struct flush_work *fw; fw = kmalloc(sizeof(*fw), GFP_ATOMIC); if (fw) { - MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_METRICS, - dev_info(DEV, "Schedul flush %p/%d { size=%d } nr_epochs=%d\n", - epoch, epoch->barrier_nr, epoch_size, mdev->epochs); - ); + trace_drbd_epoch(mdev, epoch, EV_TRACE_FLUSH); fw->w.cb = w_flush; fw->epoch = epoch; drbd_queue_work(&mdev->data.work, &fw->w); @@ -1273,9 +1236,7 @@ STATIC int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) list_add(&epoch->list, &mdev->current_epoch->list); mdev->current_epoch = epoch; mdev->epochs++; - MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_METRICS, - dev_info(DEV, "Allocat epoch %p/xxxx { } nr_epochs=%d\n", epoch, mdev->epochs); - ); + trace_drbd_epoch(mdev, epoch, EV_TRACE_ALLOC); } else { /* The current_epoch got recycled while we allocated this one... */ kfree(epoch); @@ -1485,11 +1446,8 @@ STATIC int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si list_add(&e->w.list, &mdev->sync_ee); spin_unlock_irq(&mdev->req_lock); - MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, - dev_info(DEV, "submit EE (RS)WRITE sec=%llus size=%u ee=%p\n", - (unsigned long long)e->sector, e->size, e); - ); - dump_internal_bio("Sec", mdev, e->private_bio, 0); + trace_drbd_ee(mdev, e, "submitting for (rs)write"); + trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); drbd_generic_make_request(mdev, DRBD_FAULT_RS_WR, e->private_bio); /* accounting done in endio */ @@ -1752,23 +1710,17 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct p_header *h) a Barrier. */ epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list); if (epoch == e->epoch) { - MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_METRICS, - dev_info(DEV, "Add barrier %p/%d\n", - epoch, epoch->barrier_nr); - ); set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); + trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER); rw |= (1<flags |= EE_IS_BARRIER; } else { if (atomic_read(&epoch->epoch_size) > 1 || !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) { - MTRACE(TRACE_TYPE_EPOCHS, TRACE_LVL_METRICS, - dev_info(DEV, "Add barrier %p/%d, setting bi in %p/%d\n", - e->epoch, e->epoch->barrier_nr, - epoch, epoch->barrier_nr); - ); set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); + trace_drbd_epoch(mdev, epoch, EV_TRACE_SETTING_BI); set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); + trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER); rw |= (1<flags |= EE_IS_BARRIER; } @@ -1940,13 +1892,9 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct p_header *h) drbd_al_begin_io(mdev, e->sector); } - MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, - dev_info(DEV, "submit EE (DATA)WRITE sec=%llus size=%u ee=%p\n", - (unsigned long long)e->sector, e->size, e); - ); - e->private_bio->bi_rw = rw; - dump_internal_bio("Sec", mdev, e->private_bio, 0); + trace_drbd_ee(mdev, e, "submitting for (data)write"); + trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, e->private_bio); /* accounting done in endio */ @@ -2104,12 +2052,8 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) inc_unacked(mdev); - MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, - dev_info(DEV, "submit EE READ sec=%llus size=%u ee=%p\n", - (unsigned long long)e->sector, e->size, e); - ); - - dump_internal_bio("Sec", mdev, e->private_bio, 0); + trace_drbd_ee(mdev, e, "submitting for read"); + trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL); drbd_generic_make_request(mdev, fault_type, e->private_bio); maybe_kick_lo(mdev); @@ -2331,15 +2275,12 @@ STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l /* lowest bit is set when we were primary, * next bit (weight 2) is set when peer was primary */ - MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, DUMPI(rct);); - switch (rct) { case 0: /* !self_pri && !peer_pri */ return 0; case 1: /* self_pri && !peer_pri */ return 1; case 2: /* !self_pri && peer_pri */ return -1; case 3: /* self_pri && peer_pri */ dc = test_bit(DISCARD_CONCURRENT, &mdev->flags); - MTRACE(TRACE_TYPE_UUID, TRACE_LVL_METRICS, DUMPI(dc);); return dc ? -1 : 1; } } @@ -3467,7 +3408,7 @@ STATIC void drbdd(struct drbd_conf *mdev) break; } - dump_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, + trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, __FILE__, __LINE__); } } @@ -3729,7 +3670,7 @@ int drbd_do_handshake(struct drbd_conf *mdev) return 0; } - dump_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, + trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf, __FILE__, __LINE__); p->protocol_min = be32_to_cpu(p->protocol_min); @@ -4309,14 +4250,14 @@ STATIC int drbd_asender(struct drbd_thread *thi) } expect = cmd->pkt_size; ERR_IF(len != expect-sizeof(struct p_header)) { - dump_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); + trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); DUMPI(expect); goto reconnect; } } if (received == expect) { D_ASSERT(cmd != NULL); - dump_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); + trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); if (!cmd->process(mdev, h)) goto reconnect; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index cbfcb6b8b4d4..dcf642563c77 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -29,82 +29,9 @@ #include #include #include "drbd_int.h" +#include "drbd_tracing.h" #include "drbd_req.h" -/* outside of the ifdef - * because of the _print_rq_state(,FIXME) in barrier_acked */ -STATIC void _print_rq_state(struct drbd_request *req, const char *txt) -{ - const unsigned long s = req->rq_state; - struct drbd_conf *mdev = req->mdev; - const int rw = (req->master_bio == NULL || - bio_data_dir(req->master_bio) == WRITE) ? - 'W' : 'R'; - - dev_info(DEV, "%s %p %c L%c%c%cN%c%c%c%c%c %u (%llus +%u) %s\n", - txt, req, rw, - s & RQ_LOCAL_PENDING ? 'p' : '-', - s & RQ_LOCAL_COMPLETED ? 'c' : '-', - s & RQ_LOCAL_OK ? 'o' : '-', - s & RQ_NET_PENDING ? 'p' : '-', - s & RQ_NET_QUEUED ? 'q' : '-', - s & RQ_NET_SENT ? 's' : '-', - s & RQ_NET_DONE ? 'd' : '-', - s & RQ_NET_OK ? 'o' : '-', - req->epoch, - (unsigned long long)req->sector, - req->size, - conns_to_name(mdev->state.conn)); -} - -/* #define VERBOSE_REQUEST_CODE */ -#if defined(VERBOSE_REQUEST_CODE) || defined(ENABLE_DYNAMIC_TRACE) -STATIC void _print_req_mod(struct drbd_request *req, enum drbd_req_event what) -{ - struct drbd_conf *mdev = req->mdev; - const int rw = (req->master_bio == NULL || - bio_data_dir(req->master_bio) == WRITE) ? - 'W' : 'R'; - - static const char *rq_event_names[] = { - [created] = "created", - [to_be_send] = "to_be_send", - [to_be_submitted] = "to_be_submitted", - [queue_for_net_write] = "queue_for_net_write", - [queue_for_net_read] = "queue_for_net_read", - [send_canceled] = "send_canceled", - [send_failed] = "send_failed", - [handed_over_to_network] = "handed_over_to_network", - [connection_lost_while_pending] = - "connection_lost_while_pending", - [recv_acked_by_peer] = "recv_acked_by_peer", - [write_acked_by_peer] = "write_acked_by_peer", - [neg_acked] = "neg_acked", - [conflict_discarded_by_peer] = "conflict_discarded_by_peer", - [barrier_acked] = "barrier_acked", - [data_received] = "data_received", - [read_completed_with_error] = "read_completed_with_error", - [write_completed_with_error] = "write_completed_with_error", - [completed_ok] = "completed_ok", - }; - - dev_info(DEV, "_req_mod(%p %c ,%s)\n", req, rw, rq_event_names[what]); -} - -# ifdef ENABLE_DYNAMIC_TRACE -# define print_rq_state(R, T) \ - MTRACE(TRACE_TYPE_RQ, TRACE_LVL_METRICS, _print_rq_state(R, T);) -# define print_req_mod(T, W) \ - MTRACE(TRACE_TYPE_RQ, TRACE_LVL_METRICS, _print_req_mod(T, W);) -# else -# define print_rq_state(R, T) _print_rq_state(R, T) -# define print_req_mod(T, W) _print_req_mod(T, W) -# endif - -#else -#define print_rq_state(R, T) -#define print_req_mod(T, W) -#endif /* Update disk stats at start of I/O request */ static inline void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio) @@ -292,7 +219,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, static void _complete_master_bio(struct drbd_conf *mdev, struct drbd_request *req, int error) { - dump_bio(mdev, req->master_bio, 1, req); + trace_drbd_bio(mdev, "Rq", req->master_bio, 1, req); bio_endio(req->master_bio, error); req->master_bio = NULL; dec_ap_bio(mdev); @@ -304,7 +231,7 @@ void _req_may_be_done(struct drbd_request *req, int error) struct drbd_conf *mdev = req->mdev; int rw; - print_rq_state(req, "_req_may_be_done"); + trace_drbd_req(req, nothing, "_req_may_be_done"); /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) @@ -486,7 +413,7 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) if (error && (bio_rw(req->master_bio) != READA)) dev_err(DEV, "got an _req_mod() errno of %d\n", error); - print_req_mod(req, what); + trace_drbd_req(req, what, NULL); switch (what) { default: @@ -739,8 +666,8 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) /* barrier came in before all requests have been acked. * this is bad, because if the connection is lost now, * we won't be able to clean them up... */ - _print_rq_state(req, - "FIXME (barrier_acked but pending)"); + dev_err(DEV, "FIXME (barrier_acked but pending)\n"); + trace_drbd_req(req, nothing, "FIXME (barrier_acked but pending)"); list_move(&req->tl_requests, &mdev->out_of_sequence_requests); } D_ASSERT(req->rq_state & RQ_NET_SENT); @@ -810,7 +737,7 @@ STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) return 0; } - dump_bio(mdev, bio, 0, req); + trace_drbd_bio(mdev, "Rq", bio, 0, req); local = inc_local(mdev); if (!local) { @@ -1003,7 +930,7 @@ allocate_barrier: if (local) { req->private_bio->bi_bdev = mdev->bc->backing_bdev; - dump_internal_bio("Pri", mdev, req->private_bio, 0); + trace_drbd_bio(mdev, "Pri", req->private_bio, 0, NULL); if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR : rw == READ ? DRBD_FAULT_DT_RD diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 8866ea62f431..a63a1e9ae5a8 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -103,6 +103,7 @@ enum drbd_req_event { read_completed_with_error, write_completed_with_error, completed_ok, + nothing, /* for tracing only */ }; /* encoding of request states for now. we don't actually need that many bits. diff --git a/drivers/block/drbd/drbd_tracing.c b/drivers/block/drbd/drbd_tracing.c new file mode 100644 index 000000000000..2eff178fbb0f --- /dev/null +++ b/drivers/block/drbd/drbd_tracing.c @@ -0,0 +1,762 @@ +/* + drbd_tracing.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include +#include +#include "drbd_int.h" +#include "drbd_tracing.h" +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Philipp Reisner, Lars Ellenberg"); +MODULE_DESCRIPTION("DRBD tracepoint probes"); +MODULE_PARM_DESC(trace_mask, "Bitmap of events to trace see drbd_tracing.c"); +MODULE_PARM_DESC(trace_level, "Current tracing level (changeable in /sys)"); +MODULE_PARM_DESC(trace_devs, "Bitmap of devices to trace (changeable in /sys)"); + +unsigned int trace_mask = 0; /* Bitmap of events to trace */ +int trace_level; /* Current trace level */ +int trace_devs; /* Bitmap of devices to trace */ + +module_param(trace_mask, uint, 0444); +module_param(trace_level, int, 0644); +module_param(trace_devs, int, 0644); + +enum { + TRACE_PACKET = 0x0001, + TRACE_RQ = 0x0002, + TRACE_UUID = 0x0004, + TRACE_RESYNC = 0x0008, + TRACE_EE = 0x0010, + TRACE_UNPLUG = 0x0020, + TRACE_NL = 0x0040, + TRACE_AL_EXT = 0x0080, + TRACE_INT_RQ = 0x0100, + TRACE_MD_IO = 0x0200, + TRACE_EPOCH = 0x0400, +}; + +/* Buffer printing support + * dbg_print_flags: used for Flags arg to drbd_print_buffer + * - DBGPRINT_BUFFADDR; if set, each line starts with the + * virtual address of the line being output. If clear, + * each line starts with the offset from the beginning + * of the buffer. */ +enum dbg_print_flags { + DBGPRINT_BUFFADDR = 0x0001, +}; + +/* Macro stuff */ +STATIC char *nl_packet_name(int packet_type) +{ +/* Generate packet type strings */ +#define NL_PACKET(name, number, fields) \ + [P_ ## name] = # name, +#define NL_INTEGER Argh! +#define NL_BIT Argh! +#define NL_INT64 Argh! +#define NL_STRING Argh! + + static char *nl_tag_name[P_nl_after_last_packet] = { +#include "linux/drbd_nl.h" + }; + + return (packet_type < sizeof(nl_tag_name)/sizeof(nl_tag_name[0])) ? + nl_tag_name[packet_type] : "*Unknown*"; +} +/* /Macro stuff */ + +static inline int is_mdev_trace(struct drbd_conf *mdev, unsigned int level) +{ + return trace_level >= level && ((1 << mdev_to_minor(mdev)) & trace_devs); +} + +static void probe_drbd_unplug(struct drbd_conf *mdev, char *msg) +{ + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + dev_info(DEV, "%s, ap_bio_count=%d\n", msg, atomic_read(&mdev->ap_bio_cnt)); +} + +static void probe_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index) +{ + static char *uuid_str[UI_EXTENDED_SIZE] = { + [UI_CURRENT] = "CURRENT", + [UI_BITMAP] = "BITMAP", + [UI_HISTORY_START] = "HISTORY_START", + [UI_HISTORY_END] = "HISTORY_END", + [UI_SIZE] = "SIZE", + [UI_FLAGS] = "FLAGS", + }; + + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + if (index >= UI_EXTENDED_SIZE) { + dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n"); + return; + } + + dev_info(DEV, " uuid[%s] now %016llX\n", + uuid_str[index], + (unsigned long long)mdev->bc->md.uuid[index]); +} + +static void probe_drbd_md_io(struct drbd_conf *mdev, int rw, + struct drbd_backing_dev *bdev) +{ + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + dev_info(DEV, " %s metadata superblock now\n", + rw == READ ? "Reading" : "Writing"); +} + +static void probe_drbd_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg) +{ + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + dev_info(DEV, "EE %s sec=%llus size=%u e=%p\n", + msg, (unsigned long long)e->sector, e->size, e); +} + +static void probe_drbd_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch, + enum epoch_event ev) +{ + static char *epoch_event_str[] = { + [EV_PUT] = "put", + [EV_GOT_BARRIER_NR] = "got_barrier_nr", + [EV_BARRIER_DONE] = "barrier_done", + [EV_BECAME_LAST] = "became_last", + [EV_TRACE_FLUSH] = "issuing_flush", + [EV_TRACE_ADD_BARRIER] = "added_barrier", + [EV_TRACE_SETTING_BI] = "just set barrier_in_next_epoch", + }; + + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + ev &= ~EV_CLEANUP; + + switch (ev) { + case EV_TRACE_ALLOC: + dev_info(DEV, "Allocat epoch %p/xxxx { } nr_epochs=%d\n", epoch, mdev->epochs); + break; + case EV_TRACE_FREE: + dev_info(DEV, "Freeing epoch %p/%d { size=%d } nr_epochs=%d\n", + epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size), + mdev->epochs); + break; + default: + dev_info(DEV, "Update epoch %p/%d { size=%d active=%d %c%c n%c%c } ev=%s\n", + epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size), + atomic_read(&epoch->active), + test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) ? 'n' : '-', + test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) ? 'b' : '-', + test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) ? 'i' : '-', + test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ? 'd' : '-', + epoch_event_str[ev]); + } +} + +static void probe_drbd_netlink(void *data, int is_req) +{ + struct cn_msg *msg = data; + + if (is_req) { + struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)msg->data; + + printk(KERN_INFO "drbd%d: " + "Netlink: << %s (%d) - seq: %x, ack: %x, len: %x\n", + nlp->drbd_minor, + nl_packet_name(nlp->packet_type), + nlp->packet_type, + msg->seq, msg->ack, msg->len); + } else { + struct drbd_nl_cfg_reply *nlp = (struct drbd_nl_cfg_reply *)msg->data; + + printk(KERN_INFO "drbd%d: " + "Netlink: >> %s (%d) - seq: %x, ack: %x, len: %x\n", + nlp->minor, + nlp->packet_type == P_nl_after_last_packet ? + "Empty-Reply" : nl_packet_name(nlp->packet_type), + nlp->packet_type, + msg->seq, msg->ack, msg->len); + } +} + +static void probe_drbd_actlog(struct drbd_conf *mdev, sector_t sector, char* msg) +{ + unsigned int enr = (sector >> (AL_EXTENT_SIZE_B-9)); + + if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) + return; + + dev_info(DEV, "%s (sec=%llus, al_enr=%u, rs_enr=%d)\n", + msg, (unsigned long long) sector, enr, + (int)BM_SECT_TO_EXT(sector)); +} + +/* + * + * drbd_print_buffer + * + * This routine dumps binary data to the debugging output. Can be + * called at interrupt level. + * + * Arguments: + * + * prefix - String is output at the beginning of each line output + * flags - Control operation of the routine. Currently defined + * Flags are: + * DBGPRINT_BUFFADDR; if set, each line starts with the + * virtual address of the line being outupt. If clear, + * each line starts with the offset from the beginning + * of the buffer. + * size - Indicates the size of each entry in the buffer. Supported + * values are sizeof(char), sizeof(short) and sizeof(int) + * buffer - Start address of buffer + * buffer_va - Virtual address of start of buffer (normally the same + * as Buffer, but having it separate allows it to hold + * file address for example) + * length - length of buffer + * + */ +static void drbd_print_buffer(const char *prefix, unsigned int flags, int size, + const void *buffer, const void *buffer_va, + unsigned int length) + +#define LINE_SIZE 16 +#define LINE_ENTRIES (int)(LINE_SIZE/size) +{ + const unsigned char *pstart; + const unsigned char *pstart_va; + const unsigned char *pend; + char bytes_str[LINE_SIZE*3+8], ascii_str[LINE_SIZE+8]; + char *pbytes = bytes_str, *pascii = ascii_str; + int offset = 0; + long sizemask; + int field_width; + int index; + const unsigned char *pend_str; + const unsigned char *p; + int count; + + /* verify size parameter */ + if (size != sizeof(char) && + size != sizeof(short) && + size != sizeof(int)) { + printk(KERN_DEBUG "drbd_print_buffer: " + "ERROR invalid size %d\n", size); + return; + } + + sizemask = size-1; + field_width = size*2; + + /* Adjust start/end to be on appropriate boundary for size */ + buffer = (const char *)((long)buffer & ~sizemask); + pend = (const unsigned char *) + (((long)buffer + length + sizemask) & ~sizemask); + + if (flags & DBGPRINT_BUFFADDR) { + /* Move start back to nearest multiple of line size, + * if printing address. This results in nicely formatted output + * with addresses being on line size (16) byte boundaries */ + pstart = (const unsigned char *)((long)buffer & ~(LINE_SIZE-1)); + } else { + pstart = (const unsigned char *)buffer; + } + + /* Set value of start VA to print if addresses asked for */ + pstart_va = (const unsigned char *)buffer_va + - ((const unsigned char *)buffer-pstart); + + /* Calculate end position to nicely align right hand side */ + pend_str = pstart + (((pend-pstart) + LINE_SIZE-1) & ~(LINE_SIZE-1)); + + /* Init strings */ + *pbytes = *pascii = '\0'; + + /* Start at beginning of first line */ + p = pstart; + count = 0; + + while (p < pend_str) { + if (p < (const unsigned char *)buffer || p >= pend) { + /* Before start of buffer or after end- print spaces */ + pbytes += sprintf(pbytes, "%*c ", field_width, ' '); + pascii += sprintf(pascii, "%*c", size, ' '); + p += size; + } else { + /* Add hex and ascii to strings */ + int val; + switch (size) { + default: + case 1: + val = *(unsigned char *)p; + break; + case 2: + val = *(unsigned short *)p; + break; + case 4: + val = *(unsigned int *)p; + break; + } + + pbytes += sprintf(pbytes, "%0*x ", field_width, val); + + for (index = size; index; index--) { + *pascii++ = isprint(*p) ? *p : '.'; + p++; + } + } + + count++; + + if (count == LINE_ENTRIES || p >= pend_str) { + /* Null terminate and print record */ + *pascii = '\0'; + printk(KERN_DEBUG "%s%8.8lx: %*s|%*s|\n", + prefix, + (flags & DBGPRINT_BUFFADDR) + ? (long)pstart_va:(long)offset, + LINE_ENTRIES*(field_width+1), bytes_str, + LINE_SIZE, ascii_str); + + /* Move onto next line */ + pstart_va += (p-pstart); + pstart = p; + count = 0; + offset += LINE_SIZE; + + /* Re-init strings */ + pbytes = bytes_str; + pascii = ascii_str; + *pbytes = *pascii = '\0'; + } + } +} + +static void probe_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, va_list args) +{ + char str[256]; + + if (!is_mdev_trace(mdev, level)) + return; + + if (vsnprintf(str, 256, fmt, args) >= 256) + str[255] = 0; + + printk(KERN_INFO "%s %s: %s", dev_driver_string(disk_to_dev(mdev->vdisk)), + dev_name(disk_to_dev(mdev->vdisk)), str); +} + +static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete, + struct drbd_request *r) +{ +#ifdef CONFIG_LBD +#define SECTOR_FORMAT "%Lx" +#else +#define SECTOR_FORMAT "%lx" +#endif +#define SECTOR_SHIFT 9 + + unsigned long lowaddr = (unsigned long)(bio->bi_sector << SECTOR_SHIFT); + char *faddr = (char *)(lowaddr); + char rb[sizeof(void *)*2+6] = { 0, }; + struct bio_vec *bvec; + int segno; + + const int rw = bio->bi_rw; + const int biorw = (rw & (RW_MASK|RWA_MASK)); + const int biobarrier = (rw & (1<>>", + pfx, + biorw == WRITE ? "Write" : "Read", + biobarrier ? " : B" : "", + biosync ? " : S" : "", + bio, + rb, + complete ? (bio_flagged(bio, BIO_UPTODATE) ? "Success, " : "Failed, ") : "", + bio->bi_sector << SECTOR_SHIFT, + bio->bi_size); + + if (trace_level >= TRACE_LVL_METRICS && + ((biorw == WRITE) ^ complete)) { + printk(KERN_DEBUG " ind page offset length\n"); + __bio_for_each_segment(bvec, bio, segno, 0) { + printk(KERN_DEBUG " [%d] %p %8.8x %8.8x\n", segno, + bvec->bv_page, bvec->bv_offset, bvec->bv_len); + + if (trace_level >= TRACE_LVL_ALL) { + char *bvec_buf; + unsigned long flags; + + bvec_buf = bvec_kmap_irq(bvec, &flags); + + drbd_print_buffer(" ", DBGPRINT_BUFFADDR, 1, + bvec_buf, + faddr, + (bvec->bv_len <= 0x80) + ? bvec->bv_len : 0x80); + + bvec_kunmap_irq(bvec_buf, &flags); + + if (bvec->bv_len > 0x40) + printk(KERN_DEBUG " ....\n"); + + faddr += bvec->bv_len; + } + } + } +} + +static void probe_drbd_req(struct drbd_request *req, enum drbd_req_event what, char *msg) +{ + static const char *rq_event_names[] = { + [created] = "created", + [to_be_send] = "to_be_send", + [to_be_submitted] = "to_be_submitted", + [queue_for_net_write] = "queue_for_net_write", + [queue_for_net_read] = "queue_for_net_read", + [send_canceled] = "send_canceled", + [send_failed] = "send_failed", + [handed_over_to_network] = "handed_over_to_network", + [connection_lost_while_pending] = + "connection_lost_while_pending", + [recv_acked_by_peer] = "recv_acked_by_peer", + [write_acked_by_peer] = "write_acked_by_peer", + [neg_acked] = "neg_acked", + [conflict_discarded_by_peer] = "conflict_discarded_by_peer", + [barrier_acked] = "barrier_acked", + [data_received] = "data_received", + [read_completed_with_error] = "read_completed_with_error", + [write_completed_with_error] = "write_completed_with_error", + [completed_ok] = "completed_ok", + }; + + struct drbd_conf *mdev = req->mdev; + + const int rw = (req->master_bio == NULL || + bio_data_dir(req->master_bio) == WRITE) ? + 'W' : 'R'; + const unsigned long s = req->rq_state; + + if (what != nothing) { + dev_info(DEV, "_req_mod(%p %c ,%s)\n", req, rw, rq_event_names[what]); + } else { + dev_info(DEV, "%s %p %c L%c%c%cN%c%c%c%c%c %u (%llus +%u) %s\n", + msg, req, rw, + s & RQ_LOCAL_PENDING ? 'p' : '-', + s & RQ_LOCAL_COMPLETED ? 'c' : '-', + s & RQ_LOCAL_OK ? 'o' : '-', + s & RQ_NET_PENDING ? 'p' : '-', + s & RQ_NET_QUEUED ? 'q' : '-', + s & RQ_NET_SENT ? 's' : '-', + s & RQ_NET_DONE ? 'd' : '-', + s & RQ_NET_OK ? 'o' : '-', + req->epoch, + (unsigned long long)req->sector, + req->size, + conns_to_name(mdev->state.conn)); + } +} + + +#define peers_to_name roles_to_name +#define pdsks_to_name disks_to_name + +#define PSM(A) \ +do { \ + if (mask.A) { \ + int i = snprintf(p, len, " " #A "( %s )", \ + A##s_to_name(val.A)); \ + if (i >= len) \ + return op; \ + p += i; \ + len -= i; \ + } \ +} while (0) + +STATIC char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val) +{ + char *op = p; + *p = '\0'; + PSM(role); + PSM(peer); + PSM(conn); + PSM(disk); + PSM(pdsk); + + return op; +} + +#define INFOP(fmt, args...) \ +do { \ + if (trace_level >= TRACE_LVL_ALL) { \ + dev_info(DEV, "%s:%d: %s [%d] %s %s " fmt , \ + file, line, current->comm, current->pid, \ + sockname, recv ? "<<<" : ">>>" , \ + ## args); \ + } else { \ + dev_info(DEV, "%s %s " fmt, sockname, \ + recv ? "<<<" : ">>>" , \ + ## args); \ + } \ +} while (0) + +STATIC char *_dump_block_id(u64 block_id, char *buff) +{ + if (is_syncer_block_id(block_id)) + strcpy(buff, "SyncerId"); + else + sprintf(buff, "%llx", (unsigned long long)block_id); + + return buff; +} + +static void probe_drbd_packet(struct drbd_conf *mdev, struct socket *sock, + int recv, union p_polymorph *p, char *file, int line) +{ + char *sockname = sock == mdev->meta.socket ? "meta" : "data"; + int cmd = (recv == 2) ? p->header.command : be16_to_cpu(p->header.command); + char tmp[300]; + union drbd_state m, v; + + switch (cmd) { + case P_HAND_SHAKE: + INFOP("%s (protocol %u-%u)\n", cmdname(cmd), + be32_to_cpu(p->handshake.protocol_min), + be32_to_cpu(p->handshake.protocol_max)); + break; + + case P_BITMAP: /* don't report this */ + case P_COMPRESSED_BITMAP: /* don't report this */ + break; + + case P_DATA: + INFOP("%s (sector %llus, id %s, seq %u, f %x)\n", cmdname(cmd), + (unsigned long long)be64_to_cpu(p->data.sector), + _dump_block_id(p->data.block_id, tmp), + be32_to_cpu(p->data.seq_num), + be32_to_cpu(p->data.dp_flags) + ); + break; + + case P_DATA_REPLY: + case P_RS_DATA_REPLY: + INFOP("%s (sector %llus, id %s)\n", cmdname(cmd), + (unsigned long long)be64_to_cpu(p->data.sector), + _dump_block_id(p->data.block_id, tmp) + ); + break; + + case P_RECV_ACK: + case P_WRITE_ACK: + case P_RS_WRITE_ACK: + case P_DISCARD_ACK: + case P_NEG_ACK: + case P_NEG_RS_DREPLY: + INFOP("%s (sector %llus, size %u, id %s, seq %u)\n", + cmdname(cmd), + (long long)be64_to_cpu(p->block_ack.sector), + be32_to_cpu(p->block_ack.blksize), + _dump_block_id(p->block_ack.block_id, tmp), + be32_to_cpu(p->block_ack.seq_num) + ); + break; + + case P_DATA_REQUEST: + case P_RS_DATA_REQUEST: + INFOP("%s (sector %llus, size %u, id %s)\n", cmdname(cmd), + (long long)be64_to_cpu(p->block_req.sector), + be32_to_cpu(p->block_req.blksize), + _dump_block_id(p->block_req.block_id, tmp) + ); + break; + + case P_BARRIER: + case P_BARRIER_ACK: + INFOP("%s (barrier %u)\n", cmdname(cmd), p->barrier.barrier); + break; + + case P_SYNC_PARAM: + case P_SYNC_PARAM89: + INFOP("%s (rate %u, verify-alg \"%.64s\", csums-alg \"%.64s\")\n", + cmdname(cmd), be32_to_cpu(p->rs_param_89.rate), + p->rs_param_89.verify_alg, p->rs_param_89.csums_alg); + break; + + case P_UUIDS: + INFOP("%s Curr:%016llX, Bitmap:%016llX, " + "HisSt:%016llX, HisEnd:%016llX\n", + cmdname(cmd), + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_CURRENT]), + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_BITMAP]), + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_START]), + (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_END])); + break; + + case P_SIZES: + INFOP("%s (d %lluMiB, u %lluMiB, c %lldMiB, " + "max bio %x, q order %x)\n", + cmdname(cmd), + (long long)(be64_to_cpu(p->sizes.d_size)>>(20-9)), + (long long)(be64_to_cpu(p->sizes.u_size)>>(20-9)), + (long long)(be64_to_cpu(p->sizes.c_size)>>(20-9)), + be32_to_cpu(p->sizes.max_segment_size), + be32_to_cpu(p->sizes.queue_order_type)); + break; + + case P_STATE: + v.i = be32_to_cpu(p->state.state); + m.i = 0xffffffff; + dump_st(tmp, sizeof(tmp), m, v); + INFOP("%s (s %x {%s})\n", cmdname(cmd), v.i, tmp); + break; + + case P_STATE_CHG_REQ: + m.i = be32_to_cpu(p->req_state.mask); + v.i = be32_to_cpu(p->req_state.val); + dump_st(tmp, sizeof(tmp), m, v); + INFOP("%s (m %x v %x {%s})\n", cmdname(cmd), m.i, v.i, tmp); + break; + + case P_STATE_CHG_REPLY: + INFOP("%s (ret %x)\n", cmdname(cmd), + be32_to_cpu(p->req_state_reply.retcode)); + break; + + case P_PING: + case P_PING_ACK: + /* + * Dont trace pings at summary level + */ + if (trace_level < TRACE_LVL_ALL) + break; + /* fall through... */ + default: + INFOP("%s (%u)\n", cmdname(cmd), cmd); + break; + } +} + + +static int __init drbd_trace_init(void) +{ + int ret; + + if (trace_mask & TRACE_UNPLUG) { + ret = register_trace_drbd_unplug(probe_drbd_unplug); + WARN_ON(ret); + } + if (trace_mask & TRACE_UUID) { + ret = register_trace_drbd_uuid(probe_drbd_uuid); + WARN_ON(ret); + } + if (trace_mask & TRACE_EE) { + ret = register_trace_drbd_ee(probe_drbd_ee); + WARN_ON(ret); + } + if (trace_mask & TRACE_PACKET) { + ret = register_trace_drbd_packet(probe_drbd_packet); + WARN_ON(ret); + } + if (trace_mask & TRACE_MD_IO) { + ret = register_trace_drbd_md_io(probe_drbd_md_io); + WARN_ON(ret); + } + if (trace_mask & TRACE_EPOCH) { + ret = register_trace_drbd_epoch(probe_drbd_epoch); + WARN_ON(ret); + } + if (trace_mask & TRACE_NL) { + ret = register_trace_drbd_netlink(probe_drbd_netlink); + WARN_ON(ret); + } + if (trace_mask & TRACE_AL_EXT) { + ret = register_trace_drbd_actlog(probe_drbd_actlog); + WARN_ON(ret); + } + if (trace_mask & TRACE_RQ) { + ret = register_trace_drbd_bio(probe_drbd_bio); + WARN_ON(ret); + } + if (trace_mask & TRACE_INT_RQ) { + ret = register_trace_drbd_req(probe_drbd_req); + WARN_ON(ret); + } + if (trace_mask & TRACE_RESYNC) { + ret = register_trace__drbd_resync(probe_drbd_resync); + WARN_ON(ret); + } + return 0; +} + +module_init(drbd_trace_init); + +static void __exit drbd_trace_exit(void) +{ + if (trace_mask & TRACE_UNPLUG) + unregister_trace_drbd_unplug(probe_drbd_unplug); + if (trace_mask & TRACE_UUID) + unregister_trace_drbd_uuid(probe_drbd_uuid); + if (trace_mask & TRACE_EE) + unregister_trace_drbd_ee(probe_drbd_ee); + if (trace_mask & TRACE_PACKET) + unregister_trace_drbd_packet(probe_drbd_packet); + if (trace_mask & TRACE_MD_IO) + unregister_trace_drbd_md_io(probe_drbd_md_io); + if (trace_mask & TRACE_EPOCH) + unregister_trace_drbd_epoch(probe_drbd_epoch); + if (trace_mask & TRACE_NL) + unregister_trace_drbd_netlink(probe_drbd_netlink); + if (trace_mask & TRACE_AL_EXT) + unregister_trace_drbd_actlog(probe_drbd_actlog); + if (trace_mask & TRACE_RQ) + unregister_trace_drbd_bio(probe_drbd_bio); + if (trace_mask & TRACE_INT_RQ) + unregister_trace_drbd_req(probe_drbd_req); + if (trace_mask & TRACE_RESYNC) + unregister_trace__drbd_resync(probe_drbd_resync); + + tracepoint_synchronize_unregister(); +} + +module_exit(drbd_trace_exit); diff --git a/drivers/block/drbd/drbd_tracing.h b/drivers/block/drbd/drbd_tracing.h new file mode 100644 index 000000000000..c4531a137f65 --- /dev/null +++ b/drivers/block/drbd/drbd_tracing.h @@ -0,0 +1,87 @@ +/* + drbd_tracing.h + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#ifndef DRBD_TRACING_H +#define DRBD_TRACING_H + +#include +#include "drbd_int.h" +#include "drbd_req.h" + +enum { + TRACE_LVL_ALWAYS = 0, + TRACE_LVL_SUMMARY, + TRACE_LVL_METRICS, + TRACE_LVL_ALL, + TRACE_LVL_MAX +}; + +DECLARE_TRACE(drbd_unplug, + TP_PROTO(struct drbd_conf *mdev, char* msg), + TP_ARGS(mdev, msg)); + +DECLARE_TRACE(drbd_uuid, + TP_PROTO(struct drbd_conf *mdev, enum drbd_uuid_index index), + TP_ARGS(mdev, index)); + +DECLARE_TRACE(drbd_ee, + TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg), + TP_ARGS(mdev, e, msg)); + +DECLARE_TRACE(drbd_md_io, + TP_PROTO(struct drbd_conf *mdev, int rw, struct drbd_backing_dev *bdev), + TP_ARGS(mdev, rw, bdev)); + +DECLARE_TRACE(drbd_epoch, + TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch *epoch, enum epoch_event ev), + TP_ARGS(mdev, epoch, ev)); + +DECLARE_TRACE(drbd_netlink, + TP_PROTO(void *data, int is_req), + TP_ARGS(data, is_req)); + +DECLARE_TRACE(drbd_actlog, + TP_PROTO(struct drbd_conf *mdev, sector_t sector, char* msg), + TP_ARGS(mdev, sector, msg)); + +DECLARE_TRACE(drbd_bio, + TP_PROTO(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete, + struct drbd_request *r), + TP_ARGS(mdev, pfx, bio, complete, r)); + +DECLARE_TRACE(drbd_req, + TP_PROTO(struct drbd_request *req, enum drbd_req_event what, char *msg), + TP_ARGS(req, what, msg)); + +DECLARE_TRACE(drbd_packet, + TP_PROTO(struct drbd_conf *mdev, struct socket *sock, + int recv, union p_polymorph *p, char *file, int line), + TP_ARGS(mdev, sock, recv, p, file, line)); + +DECLARE_TRACE(_drbd_resync, + TP_PROTO(struct drbd_conf *mdev, int level, const char *fmt, va_list args), + TP_ARGS(mdev, level, fmt, args)); + +#endif diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 4be8814fd50d..81f3a4e8ea49 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -43,6 +43,7 @@ #include #include "drbd_int.h" #include "drbd_req.h" +#include "drbd_tracing.h" #define SLEEP_TIME (HZ/10) @@ -88,7 +89,7 @@ void drbd_md_io_complete(struct bio *bio, int error) md_io->error = error; - dump_internal_bio("Md", md_io->mdev, bio, 1); + trace_drbd_bio(md_io->mdev, "Md", bio, 1, NULL); complete(&md_io->event); } @@ -116,7 +117,7 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) D_ASSERT(e->block_id != ID_VACANT); - dump_internal_bio("Sec", mdev, bio, 1); + trace_drbd_bio(mdev, "Sec", bio, 1, NULL); spin_lock_irqsave(&mdev->req_lock, flags); mdev->read_cnt += e->size >> 9; @@ -129,10 +130,7 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) drbd_queue_work(&mdev->data.work, &e->w); dec_local(mdev); - MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, - dev_info(DEV, "Moved EE (READ) to worker sec=%llus size=%u ee=%p\n", - (unsigned long long)e->sector, e->size, e); - ); + trace_drbd_ee(mdev, e, "read completed"); } /* writes on behalf of the partner, or resync writes, @@ -175,7 +173,7 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) D_ASSERT(e->block_id != ID_VACANT); - dump_internal_bio("Sec", mdev, bio, 1); + trace_drbd_bio(mdev, "Sec", bio, 1, NULL); spin_lock_irqsave(&mdev->req_lock, flags); mdev->writ_cnt += e->size >> 9; @@ -191,10 +189,7 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); - MTRACE(TRACE_TYPE_EE, TRACE_LVL_ALL, - dev_info(DEV, "Moved EE (WRITE) to done_ee sec=%llus size=%u ee=%p\n", - (unsigned long long)e->sector, e->size, e); - ); + trace_drbd_ee(mdev, e, "write completed"); /* No hlist_del_init(&e->colision) here, we did not send the Ack yet, * neither did we wake possibly waiting conflicting requests. @@ -241,7 +236,7 @@ void drbd_endio_pri(struct bio *bio, int error) error = -EIO; } - dump_internal_bio("Pri", mdev, bio, 1); + trace_drbd_bio(mdev, "Pri", bio, 1, NULL); /* to avoid recursion in _req_mod */ what = error @@ -1265,10 +1260,8 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) union drbd_state ns; int r; - MTRACE(TRACE_TYPE_RESYNC, TRACE_LVL_SUMMARY, - dev_info(DEV, "Resync starting: side=%s\n", - side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource"); - ); + trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, "Resync starting: side=%s\n", + side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource"); drbd_bm_recount_bits(mdev); diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index 501ca2ed31f7..b7ce5acca0bb 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -1,3 +1,6 @@ +#ifndef _DRBD_WRAPPERS_H +#define _DRBD_WRAPPERS_H + #include #include @@ -90,3 +93,5 @@ static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm) # undef __cond_lock # define __cond_lock(x,c) (c) #endif + +#endif diff --git a/include/linux/drbd_config.h b/include/linux/drbd_config.h index 8ba12c10591b..06a750ed58bf 100644 --- a/include/linux/drbd_config.h +++ b/include/linux/drbd_config.h @@ -31,11 +31,6 @@ extern const char *drbd_buildtag(void); #define DBG_ALL_SYMBOLS /* no static functs, improves quality of OOPS traces */ #endif -/* Define this to enable dynamic tracing controlled by module parameters - * at run time. This enables ALL use of dynamic tracing including packet - * and bio dumping, etc */ -#define ENABLE_DYNAMIC_TRACE - /* Enable fault insertion code */ #define DRBD_ENABLE_FAULTS -- cgit v1.2.3 From 80c5b619dce23d1d1b55f203b5538cbe5edf001d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 30 Apr 2009 12:40:37 +0200 Subject: Need to have lines < 1000 characters in the .svg files Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- .../blockdev/drbd/DRBD-8.3-data-packets.svg | 590 ++++++++++++++++++++- Documentation/blockdev/drbd/DRBD-data-packets.svg | 461 +++++++++++++++- 2 files changed, 1047 insertions(+), 4 deletions(-) diff --git a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg index cbc68b144b5b..f87cfa0dc2fb 100644 --- a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg +++ b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg @@ -1,2 +1,588 @@ - -Master slideSlideDrawingDrawingDrawingDrawingRSDataReplyDrawingDrawingCsumRSRequestDrawingw_make_resync_request()Drawingreceive_DataRequest()Drawingdrbd_endio_read_sec()Drawingw_e_end_csum_rs_req()Drawingreceive_RSDataReply()Drawingdrbd_endio_write_sec()Drawinge_end_resync_block()DrawingDrawingWriteAckDrawinggot_BlockAck()DrawingChecksum based Resync, case not in syncDrawingDRBD-8.3 data flowDrawingw_e_send_csum()DrawingDrawingDrawingRSIsInSyncDrawingDrawingCsumRSRequestDrawingreceive_DataRequest()Drawingdrbd_endio_read_sec()Drawingw_e_end_csum_rs_req()Drawinggot_IsInSync()DrawingChecksum based Resync, case in syncDrawingDrawingDrawingDrawingOVReplyDrawingDrawingOVRequestDrawingreceive_OVRequest()Drawingdrbd_endio_read_sec()Drawingw_e_end_ov_req()Drawingreceive_OVReply()Drawingdrbd_endio_read_sec()Drawingw_e_end_ov_reply()DrawingDrawingOVResultDrawinggot_OVResult()DrawingOnline verifyDrawingw_make_ov_request()DrawingDrawingDrawingDrawingDrawingdrbd_endio_read_sec()Drawingw_make_resync_request()Drawingw_e_send_csum()DrawingDrawingdrbd_endio_read_sec()DrawingDrawingDrawingDrawingrs_begin_io()Drawingrs_begin_io()Drawingrs_begin_io()Drawingrs_complete_io()Drawingrs_complete_io()Drawingrs_complete_io()Drawingrs_begin_io()Drawingrs_begin_io()Drawingrs_begin_io()Drawingrs_complete_io()Drawingrs_complete_io()Drawingrs_complete_io() \ No newline at end of file + + + + + + Master slide + + + + + + + + + + RSDataReply + + + + + + + CsumRSRequest + + + + w_make_resync_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_csum_rs_req() + + + receive_RSDataReply() + + + drbd_endio_write_sec() + + + e_end_resync_block() + + + + + + WriteAck + + + + got_BlockAck() + + + Checksum based Resync, case not in sync + + + DRBD-8.3 data flow + + + w_e_send_csum() + + + + + + + + RSIsInSync + + + + + + + CsumRSRequest + + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_csum_rs_req() + + + got_IsInSync() + + + Checksum based Resync, case in sync + + + + + + + + + + OVReply + + + + + + + OVRequest + + + + receive_OVRequest() + + + drbd_endio_read_sec() + + + w_e_end_ov_req() + + + receive_OVReply() + + + drbd_endio_read_sec() + + + w_e_end_ov_reply() + + + + + + OVResult + + + + got_OVResult() + + + Online verify + + + w_make_ov_request() + + + + + + + + drbd_endio_read_sec() + + + w_make_resync_request() + + + w_e_send_csum() + + + + + drbd_endio_read_sec() + + + + + + rs_begin_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_complete_io() + + + rs_complete_io() + + diff --git a/Documentation/blockdev/drbd/DRBD-data-packets.svg b/Documentation/blockdev/drbd/DRBD-data-packets.svg index e8ba30e85a95..48a1e2165fec 100644 --- a/Documentation/blockdev/drbd/DRBD-data-packets.svg +++ b/Documentation/blockdev/drbd/DRBD-data-packets.svg @@ -1,2 +1,459 @@ - -Master slideSlideDrawingDrawingDrawingDrawingRSDataReplyDrawingDrawingRSDataRequestDrawingw_make_resync_request()Drawingreceive_DataRequest()Drawingdrbd_endio_read_sec()Drawingw_e_end_rsdata_req()Drawingreceive_RSDataReply()Drawingdrbd_endio_write_sec()Drawinge_end_resync_block()DrawingDrawingWriteAckDrawinggot_BlockAck()DrawingResync blocks, 4-32KDrawingDrawingDrawingWriteAckDrawingDrawingDataDrawingdrbd_make_request()Drawingreceive_Data()Drawingdrbd_endio_write_sec()Drawinge_end_block()Drawinggot_BlockAck()DrawingRegular mirrored write, 512-32KDrawingw_send_dblock()DrawingDrawingdrbd_endio_write_pri()DrawingDrawingDrawingDataReplyDrawingDrawingDataRequestDrawingdrbd_make_request()Drawingreceive_DataRequest()Drawingdrbd_endio_read_sec()Drawingw_e_end_data_req()Drawingreceive_DataReply()DrawingDiskless read, 512-32KDrawingw_send_read_req()DrawingDRBD 8 data flowDrawingDrawingDrawingDrawingal_begin_io()Drawingal_complete_io()Drawingrs_begin_io()Drawingrs_complete_io()Drawingrs_begin_io()Drawingrs_complete_io() \ No newline at end of file + + + + + + Master slide + + + + + + + + + RSDataReply + + + + + RSDataRequest + + + w_make_resync_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_rsdata_req() + + + receive_RSDataReply() + + + drbd_endio_write_sec() + + + e_end_resync_block() + + + + + WriteAck + + + got_BlockAck() + + + Resync blocks, 4-32K + + + + + + + WriteAck + + + + + Data + + + drbd_make_request() + + + receive_Data() + + + drbd_endio_write_sec() + + + e_end_block() + + + got_BlockAck() + + + Regular mirrored write, 512-32K + + + w_send_dblock() + + + + + drbd_endio_write_pri() + + + + + + + DataReply + + + + + DataRequest + + + drbd_make_request() + + + receive_DataRequest() + + + drbd_endio_read_sec() + + + w_e_end_data_req() + + + Drawing + + receive_DataReply() + + + + Diskless read, 512-32K + + + w_send_read_req() + + + DRBD 8 data flow + + + + + + al_begin_io() + + + al_complete_io() + + + rs_begin_io() + + + rs_complete_io() + + + rs_begin_io() + + + rs_complete_io() + + -- cgit v1.2.3 From 38544ea16e86b0bac2a00e76de6cc15c826d7a10 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 5 May 2009 17:27:51 +0200 Subject: Tracking DRBD mainline (all cleanups done there) Improving a comment Call drbd_rs_cancel_all() and reset rs_pending when aborting resync due to detach. (Bugz 223) Allow detach of a SyncTarget node. (Bugz 221) rename inc_local -> get_ldev; inc_net -> get_net_conf; and corresponding dec_* -> put_* replace AL with activity log in comments drbd_md_sync_page_io: prepare for rw being more than just r/w direction remove outdated comment add comment: page_address allowed, preallocated with GFP_KERNEL renaming a few constants: _SECT -> _SECTOR_SIZE, _SIZE_B -> _SHIFT ... remove quite a few 'inline's from .c files replacing __attribute__((packed)) with __packed micro: comment spelling fix Allow pass-through options to drbdsetup also for the syncer subcommand dmsetup: ERR_PACKET_NR error message was missing Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 134 ++++++++++++++++++------------------ drivers/block/drbd/drbd_bitmap.c | 20 +++--- drivers/block/drbd/drbd_buildtag.c | 4 +- drivers/block/drbd/drbd_int.h | 122 ++++++++++++++++----------------- drivers/block/drbd/drbd_main.c | 137 +++++++++++++++++++++---------------- drivers/block/drbd/drbd_nl.c | 88 ++++++++++++------------ drivers/block/drbd/drbd_proc.c | 4 +- drivers/block/drbd/drbd_receiver.c | 117 +++++++++++++++++++------------ drivers/block/drbd/drbd_req.c | 39 +++++------ drivers/block/drbd/drbd_req.h | 26 ------- drivers/block/drbd/drbd_tracing.c | 2 +- drivers/block/drbd/drbd_worker.c | 44 ++++++------ drivers/block/drbd/drbd_wrappers.h | 3 +- drivers/block/drbd/lru_cache.c | 4 +- 14 files changed, 378 insertions(+), 366 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index c894b4fa6af0..f1318e57f375 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -29,14 +29,14 @@ #include "drbd_tracing.h" #include "drbd_wrappers.h" -/* I do not believe that all storage medias can guarantee atomic - * 512 byte write operations. When the journal is read, only - * transactions with correct xor_sums are considered. - * sizeof() = 512 byte */ -struct __attribute__((packed)) al_transaction { +/* We maintain a trivial check sum in our on disk activity log. + * With that we can ensure correct operation even when the storage + * device might do a partial (last) sector write while loosing power. + */ +struct __packed al_transaction { u32 magic; u32 tr_number; - struct __attribute__((packed)) { + struct __packed { u32 pos; u32 extent; } updates[1 + AL_EXTENTS_PT]; u32 xor_sum; @@ -133,7 +133,7 @@ STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev, int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw) { - int hardsect, mask, ok; + int hardsect_size, mask, ok; int offset = 0; struct page *iop = mdev->md_io_page; @@ -141,34 +141,36 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, BUG_ON(!bdev->md_bdev); - hardsect = drbd_get_hardsect(bdev->md_bdev); - if (hardsect == 0) - hardsect = MD_HARDSECT; + hardsect_size = drbd_get_hardsect_size(bdev->md_bdev); + if (hardsect_size == 0) + hardsect_size = MD_SECTOR_SIZE; - /* in case hardsect != 512 [ s390 only? ] */ - if (hardsect != MD_HARDSECT) { - mask = (hardsect / MD_HARDSECT) - 1; + /* in case hardsect_size != 512 [ s390 only? ] */ + if (hardsect_size != MD_SECTOR_SIZE) { + mask = (hardsect_size / MD_SECTOR_SIZE) - 1; D_ASSERT(mask == 1 || mask == 3 || mask == 7); - D_ASSERT(hardsect == (mask+1) * MD_HARDSECT); + D_ASSERT(hardsect_size == (mask+1) * MD_SECTOR_SIZE); offset = sector & mask; sector = sector & ~mask; iop = mdev->md_io_tmpp; - if (rw == WRITE) { + if (rw & WRITE) { + /* these are GFP_KERNEL pages, preallocated + * on device initialization */ void *p = page_address(mdev->md_io_page); void *hp = page_address(mdev->md_io_tmpp); - ok = _drbd_md_sync_page_io(mdev, bdev, iop, - sector, READ, hardsect); + ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, + READ, hardsect_size); if (unlikely(!ok)) { dev_err(DEV, "drbd_md_sync_page_io(,%llus," - "READ [hardsect!=512]) failed!\n", + "READ [hardsect_size!=512]) failed!\n", (unsigned long long)sector); return 0; } - memcpy(hp + offset*MD_HARDSECT , p, MD_HARDSECT); + memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE); } } @@ -176,27 +178,26 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector > drbd_md_last_sector(bdev)) dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n", current->comm, current->pid, __func__, - (unsigned long long)sector, rw ? "WRITE" : "READ"); + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, hardsect); + ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, hardsect_size); if (unlikely(!ok)) { dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", - (unsigned long long)sector, rw ? "WRITE" : "READ"); + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); return 0; } - if (hardsect != MD_HARDSECT && rw == READ) { + if (hardsect_size != MD_SECTOR_SIZE && !(rw & WRITE)) { void *p = page_address(mdev->md_io_page); void *hp = page_address(mdev->md_io_tmpp); - memcpy(p, hp + offset*MD_HARDSECT, MD_HARDSECT); + memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE); } return ok; } -static inline -struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) +static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) { struct lc_element *al_ext; struct bm_extent *bm_ext; @@ -229,7 +230,7 @@ struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) { - unsigned int enr = (sector >> (AL_EXTENT_SIZE_B-9)); + unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); struct lc_element *al_ext; struct update_al_work al_work; @@ -241,10 +242,11 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) if (al_ext->lc_number != enr) { /* drbd_al_write_transaction(mdev,al_ext,enr); - generic_make_request() are serialized on the - current->bio_tail list now. Therefore we have - to deligate writing something to AL to the - worker thread. */ + * recurses into generic_make_request(), which + * disalows recursion, bios being serialized on the + * current->bio_tail list now. + * we have to delegate updates to the activity log + * to the worker thread. */ init_completion(&al_work.event); al_work.al_ext = al_ext; al_work.enr = enr; @@ -264,7 +266,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector) { - unsigned int enr = (sector >> (AL_EXTENT_SIZE_B-9)); + unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); struct lc_element *extent; unsigned long flags; @@ -293,15 +295,14 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) struct lc_element *updated = aw->al_ext; const unsigned int new_enr = aw->enr; const unsigned int evicted = aw->old_enr; - struct al_transaction *buffer; sector_t sector; int i, n, mx; unsigned int extent_nr; u32 xor_sum = 0; - if (!inc_local(mdev)) { - dev_err(DEV, "inc_local() failed in w_al_write_transaction\n"); + if (!get_ldev(mdev)) { + dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n"); complete(&((struct update_al_work *)w)->event); return 1; } @@ -363,13 +364,13 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) mutex_unlock(&mdev->md_io_mutex); complete(&((struct update_al_work *)w)->event); - dec_local(mdev); + put_ldev(mdev); return 1; } /** - * drbd_al_read_tr: Reads a single transaction record form the + * drbd_al_read_tr: Reads a single transaction record from the * on disk activity log. * Returns -1 on IO error, 0 on checksum error and 1 if it is a valid * record. @@ -543,10 +544,10 @@ STATIC void atodb_endio(struct bio *bio, int error) put_page(page); bio_put(bio); mdev->bm_writ_cnt++; - dec_local(mdev); + put_ldev(mdev); } -#define S2W(s) ((s)<<(BM_EXT_SIZE_B-BM_BLOCK_SIZE_B-LN2_BPL)) +#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) /* activity log to on disk bitmap -- prepare bio unless that sector * is already covered by previously prepared bios */ STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev, @@ -606,12 +607,12 @@ STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev, bio->bi_bdev = mdev->bc->md_bdev; bio->bi_sector = on_disk_sector; - if (bio_add_page(bio, page, MD_HARDSECT, page_offset) != MD_HARDSECT) + if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE) goto out_put_page; atomic_inc(&wc->count); /* we already know that we may do this... - * inc_local_if_state(mdev,D_ATTACHING); + * get_ldev_if_state(mdev,D_ATTACHING); * just get the extra reference, so that the local_cnt reflects * the number of pending IO requests DRBD at its backing device. */ @@ -631,7 +632,8 @@ out_bio_put: /** * drbd_al_to_on_disk_bm: - * Writes the areas of the bitmap which are covered by the AL. + * Writes the areas of the bitmap which are covered by the + * currently active extents of the activity log. * called when we detach (unconfigure) local storage, * or when we go from R_PRIMARY to R_SECONDARY state. */ @@ -642,7 +644,7 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) struct bio **bios; struct drbd_atodb_wait wc; - ERR_IF (!inc_local_if_state(mdev, D_ATTACHING)) + ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING)) return; /* sorry, I don't have any act_log etc... */ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); @@ -699,7 +701,7 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) if (atomic_read(&wc.count)) wait_for_completion(&wc.io_done); - dec_local(mdev); + put_ldev(mdev); if (wc.error) drbd_io_error(mdev, TRUE); @@ -727,12 +729,12 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); - dec_local(mdev); + put_ldev(mdev); } /** - * drbd_al_apply_to_bm: Sets the bits in the bitmap that are described - * by the active extents of the AL. + * drbd_al_apply_to_bm: Sets the bits in the in-memory bitmap + * which are described by the active extents of the activity log. */ void drbd_al_apply_to_bm(struct drbd_conf *mdev) { @@ -757,7 +759,7 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev) ppsize(ppb, Bit2KB(add))); } -static inline int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) +static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) { int rv; @@ -771,8 +773,8 @@ static inline int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) } /** - * drbd_al_shrink: Removes all active extents form the AL. (but does not - * write any transactions) + * drbd_al_shrink: Removes all active extents form the activity log. + * (but does not write any transactions) * You need to lock mdev->act_log with lc_try_lock() / lc_unlock() */ void drbd_al_shrink(struct drbd_conf *mdev) @@ -796,14 +798,14 @@ STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused { struct update_odbm_work *udw = (struct update_odbm_work *)w; - if (!inc_local(mdev)) { + if (!get_ldev(mdev)) { if (__ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n"); return 1; } drbd_bm_write_sect(mdev, udw->enr); - dec_local(mdev); + put_ldev(mdev); kfree(udw); @@ -825,7 +827,7 @@ STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused /* ATTENTION. The AL's extents are 4MB each, while the extents in the * resync LRU-cache are 16MB each. - * The caller of this function has to hold an inc_local() reference. + * The caller of this function has to hold an get_ldev() reference. * * TODO will be obsoleted once we have a caching lru of the on disk bitmap */ @@ -977,9 +979,9 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, mdev->rs_mark_left = drbd_bm_total_weight(mdev); } } - if (inc_local(mdev)) { + if (get_ldev(mdev)) { drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE); - dec_local(mdev); + put_ldev(mdev); } /* just wake_up unconditional now, various lc_chaged(), * lc_put() in drbd_try_clear_on_disk_bm(). */ @@ -1012,7 +1014,7 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, return; } - if (!inc_local(mdev)) + if (!get_ldev(mdev)) return; /* no disk, no metadata, no bitmap to set bits in */ nr_sectors = drbd_get_capacity(mdev->this_bdev); @@ -1046,10 +1048,10 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, spin_unlock_irqrestore(&mdev->al_lock, flags); out: - dec_local(mdev); + put_ldev(mdev); } -static inline +static struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) { struct bm_extent *bm_ext; @@ -1088,7 +1090,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) return bm_ext; } -static inline int _is_in_al(struct drbd_conf *mdev, unsigned int enr) +static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) { struct lc_element *al_ext; int rv = 0; @@ -1330,9 +1332,9 @@ void drbd_rs_cancel_all(struct drbd_conf *mdev) spin_lock_irq(&mdev->al_lock); - if (inc_local_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */ + if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */ lc_reset(mdev->resync); - dec_local(mdev); + put_ldev(mdev); } mdev->resync_locked = 0; mdev->resync_wenr = LC_FREE; @@ -1355,7 +1357,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) spin_lock_irq(&mdev->al_lock); - if (inc_local_if_state(mdev, D_FAILED)) { + if (get_ldev_if_state(mdev, D_FAILED)) { /* ok, ->resync is there. */ for (i = 0; i < mdev->resync->nr_elements; i++) { bm_ext = (struct bm_extent *) lc_entry(mdev->resync, i); @@ -1374,7 +1376,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) if (bm_ext->lce.refcnt != 0) { dev_info(DEV, "Retrying drbd_rs_del_all() later. " "refcnt=%d\n", bm_ext->lce.refcnt); - dec_local(mdev); + put_ldev(mdev); spin_unlock_irq(&mdev->al_lock); return -EAGAIN; } @@ -1383,7 +1385,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) lc_del(mdev->resync, &bm_ext->lce); } D_ASSERT(mdev->resync->used == 0); - dec_local(mdev); + put_ldev(mdev); } spin_unlock_irq(&mdev->al_lock); @@ -1443,9 +1445,9 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) if (count) { mdev->rs_failed += count; - if (inc_local(mdev)) { + if (get_ldev(mdev)) { drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE); - dec_local(mdev); + put_ldev(mdev); } /* just wake_up unconditional now, various lc_chaged(), diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index c160f7ab9e01..213fa12da121 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -81,7 +81,7 @@ struct drbd_bitmap { #define BM_LOCKED 0 #define BM_MD_IO_ERROR 1 -static inline int bm_is_locked(struct drbd_bitmap *b) +static int bm_is_locked(struct drbd_bitmap *b) { return test_bit(BM_LOCKED, &b->bm_flags); } @@ -178,7 +178,7 @@ void bm_unmap(unsigned long *p_addr) } /* long word offset of _bitmap_ sector */ -#define S2W(s) ((s)<<(BM_EXT_SIZE_B-BM_BLOCK_SIZE_B-LN2_BPL)) +#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) /* word offset from start of bitmap to word number _in_page_ * modulo longs per page #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long)) @@ -384,12 +384,12 @@ STATIC unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endia return bits; } -static inline unsigned long bm_count_bits(struct drbd_bitmap *b) +static unsigned long bm_count_bits(struct drbd_bitmap *b) { return __bm_count_bits(b, 0); } -static inline unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b) +static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b) { return __bm_count_bits(b, 1); } @@ -498,9 +498,9 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) */ words = ALIGN(bits, 64) >> LN2_BPL; - if (inc_local(mdev)) { + if (get_ldev(mdev)) { D_ASSERT((u64)bits <= (((u64)mdev->bc->md.md_size_sect-MD_BM_OFFSET) << 12)); - dec_local(mdev); + put_ldev(mdev); } /* one extra long to catch off by one errors */ @@ -580,7 +580,7 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) unsigned long flags; /* if I don't have a disk, I don't know about out-of-sync status */ - if (!inc_local_if_state(mdev, D_NEGOTIATING)) + if (!get_ldev_if_state(mdev, D_NEGOTIATING)) return 0; ERR_IF(!b) return 0; @@ -590,7 +590,7 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) s = b->bm_set; spin_unlock_irqrestore(&b->bm_lock, flags); - dec_local(mdev); + put_ldev(mdev); return s; } @@ -864,7 +864,7 @@ STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) now = b->bm_set; dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", - ppsize(ppb, now << (BM_BLOCK_SIZE_B-10)), now); + ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); return err; } @@ -908,7 +908,7 @@ int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(lo offset = S2W(enr); /* word offset into bitmap */ num_words = min(S2W(1), bm_words - offset); if (num_words < S2W(1)) - memset(page_address(mdev->md_io_page), 0, MD_HARDSECT); + memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE); drbd_bm_get_lel(mdev, offset, num_words, page_address(mdev->md_io_page)); if (!drbd_md_sync_page_io(mdev, mdev->bc, on_disk_sector, WRITE)) { diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c index a58ad76078a5..213234342e70 100644 --- a/drivers/block/drbd/drbd_buildtag.c +++ b/drivers/block/drbd/drbd_buildtag.c @@ -2,6 +2,6 @@ #include const char *drbd_buildtag(void) { - return "GIT-hash: 29ef4c01e46b0a269d7bec39d5178be06097fead drbd/Kconfig drbd/Makefile drbd/Makefile-2.6 drbd/drbd_actlog.c drbd/drbd_bitmap.c drbd/drbd_int.h drbd/drbd_main.c drbd/drbd_nl.c drbd/drbd_proc.c drbd/drbd_receiver.c drbd/drbd_req.c drbd/drbd_req.h drbd/drbd_tracing.c drbd/drbd_tracing.h drbd/drbd_worker.c drbd/drbd_wrappers.h drbd/linux/drbd_config.h" - " build by phil@fat-tyre, 2009-04-29 15:43:41"; + return "GIT-hash: c522e740ae3163f5a5ff83c0c58d9f2801299961 drbd/drbd_int.h" + " build by phil@fat-tyre, 2009-05-05 17:15:39"; } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 175de11d31e4..dcc35bf67eea 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include "lru_cache.h" @@ -131,7 +131,7 @@ struct drbd_conf; #define ERR_IF(exp) if (({ \ int _b = (exp) != 0; \ - if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n", \ + if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n", \ __func__, #exp, __FILE__, __LINE__); \ _b; \ })) @@ -350,7 +350,7 @@ struct p_header { u16 command; u16 length; /* bytes of data after this header */ u8 payload[0]; -} __attribute((packed)); +} __packed; /* 8 bytes. packet FIXED for the next century! */ /* @@ -380,7 +380,7 @@ struct p_data { u64 block_id; /* to identify the request in protocol B&C */ u32 seq_num; u32 dp_flags; -} __attribute((packed)); +} __packed; /* * commands which share a struct: @@ -396,7 +396,7 @@ struct p_block_ack { u64 block_id; u32 blksize; u32 seq_num; -} __attribute((packed)); +} __packed; struct p_block_req { @@ -405,7 +405,7 @@ struct p_block_req { u64 block_id; u32 blksize; u32 pad; /* to multiple of 8 Byte */ -} __attribute((packed)); +} __packed; /* * commands with their own struct for additional fields: @@ -428,20 +428,20 @@ struct p_handshake { u32 _pad; u64 reserverd[7]; -} __attribute((packed)); +} __packed; /* 80 bytes, FIXED for the next century */ struct p_barrier { struct p_header head; u32 barrier; /* barrier number _handle_ only */ u32 pad; /* to multiple of 8 Byte */ -} __attribute((packed)); +} __packed; struct p_barrier_ack { struct p_header head; u32 barrier; u32 set_size; -} __attribute((packed)); +} __packed; struct p_rs_param { struct p_header head; @@ -449,7 +449,7 @@ struct p_rs_param { /* Since protocol version 88 and higher. */ char verify_alg[0]; -} __attribute((packed)); +} __packed; struct p_rs_param_89 { struct p_header head; @@ -457,7 +457,7 @@ struct p_rs_param_89 { /* protocol version 89: */ char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; -} __attribute((packed)); +} __packed; struct p_protocol { struct p_header head; @@ -471,17 +471,17 @@ struct p_protocol { /* Since protocol version 87 and higher. */ char integrity_alg[0]; -} __attribute((packed)); +} __packed; struct p_uuids { struct p_header head; u64 uuid[UI_EXTENDED_SIZE]; -} __attribute((packed)); +} __packed; struct p_rs_uuid { struct p_header head; u64 uuid; -} __attribute((packed)); +} __packed; struct p_sizes { struct p_header head; @@ -490,23 +490,23 @@ struct p_sizes { u64 c_size; /* current exported size */ u32 max_segment_size; /* Maximal size of a BIO */ u32 queue_order_type; -} __attribute((packed)); +} __packed; struct p_state { struct p_header head; u32 state; -} __attribute((packed)); +} __packed; struct p_req_state { struct p_header head; u32 mask; u32 val; -} __attribute((packed)); +} __packed; struct p_req_state_reply { struct p_header head; u32 retcode; -} __attribute((packed)); +} __packed; struct p_drbd06_param { u64 size; @@ -516,14 +516,14 @@ struct p_drbd06_param { u32 version; u32 gen_cnt[5]; u32 bit_map_gen[5]; -} __attribute((packed)); +} __packed; struct p_discard { struct p_header head; u64 block_id; u32 seq_num; u32 pad; -} __attribute((packed)); +} __packed; /* Valid values for the encoding field. * Bump proto version when changing this. */ @@ -544,7 +544,7 @@ struct p_compressed_bm { u8 encoding; u8 code[0]; -} __attribute((packed)); +} __packed; static inline enum drbd_bitmap_code DCBP_get_code(struct p_compressed_bm *p) @@ -612,7 +612,7 @@ union p_polymorph { struct p_req_state req_state; struct p_req_state_reply req_state_reply; struct p_block_req block_req; -} __attribute((packed)); +} __packed; /**********************************************************************/ enum drbd_thread_state { @@ -889,7 +889,7 @@ struct drbd_conf { unsigned long flags; /* configured by drbdsetup */ - struct net_conf *net_conf; /* protected by inc_net() and dec_net() */ + struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ struct syncer_conf sync_conf; struct drbd_backing_dev *bc __protected_by(local); @@ -994,7 +994,7 @@ struct drbd_conf { atomic_t pp_in_use; wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct page *md_io_tmpp; /* for hardsect != 512 [s390 only?] */ + struct page *md_io_tmpp; /* for hardsect_size != 512 [s390 only?] */ struct mutex md_io_mutex; /* protects the md_io_buffer */ spinlock_t al_lock; wait_queue_head_t al_wait; @@ -1187,13 +1187,13 @@ extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf #define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) /* Since the smalles IO unit is usually 512 byte */ -#define MD_HARDSECT_B 9 -#define MD_HARDSECT (1< local node thinks this block needs to be synced. */ -#define BM_BLOCK_SIZE_B 12 /* 4k per bit */ -#define BM_BLOCK_SIZE (1<>(BM_BLOCK_SIZE_B-9)) -#define BM_BIT_TO_SECT(x) ((sector_t)(x)<<(BM_BLOCK_SIZE_B-9)) +#define BM_SECT_TO_BIT(x) ((x)>>(BM_BLOCK_SHIFT-9)) +#define BM_BIT_TO_SECT(x) ((sector_t)(x)<<(BM_BLOCK_SHIFT-9)) #define BM_SECT_PER_BIT BM_BIT_TO_SECT(1) /* bit to represented kilo byte conversion */ -#define Bit2KB(bits) ((bits)<<(BM_BLOCK_SIZE_B-10)) +#define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10)) /* in which _bitmap_ extent (resp. sector) the bit for a certain * _storage_ sector is located in */ -#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SIZE_B-9)) +#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) /* how much _storage_ sectors we have per bitmap sector */ -#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SIZE_B-9)) +#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) #define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) /* in one sector of the bitmap, we have this many activity_log extents. */ -#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SIZE_B - AL_EXTENT_SIZE_B)) -#define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SIZE_B-BM_BLOCK_SIZE_B-LN2_BPL)) +#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) +#define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) -#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SIZE_B - BM_BLOCK_SIZE_B) +#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT) #define BM_BLOCKS_PER_BM_EXT_MASK ((1<net_cnt)) wake_up(&mdev->misc_wait); } /** - * inc_net: Returns TRUE when it is ok to access mdev->net_conf. You - * should call dec_net() when finished looking at mdev->net_conf. + * get_net_conf: Returns TRUE when it is ok to access mdev->net_conf. You + * should call put_net_conf() when finished looking at mdev->net_conf. */ -static inline int inc_net(struct drbd_conf *mdev) +static inline int get_net_conf(struct drbd_conf *mdev) { int have_net_conf; atomic_inc(&mdev->net_cnt); have_net_conf = mdev->state.conn >= C_UNCONNECTED; if (!have_net_conf) - dec_net(mdev); + put_net_conf(mdev); return have_net_conf; } /** - * inc_local: Returns TRUE when local IO is possible. If it returns - * TRUE you should call dec_local() after IO is completed. + * get_ldev: Returns TRUE when local IO is possible. If it returns + * TRUE you should call put_ldev() after IO is completed. */ -#define inc_local_if_state(M,MINS) __cond_lock(local, _inc_local_if_state(M,MINS)) -#define inc_local(M) __cond_lock(local, _inc_local_if_state(M,D_INCONSISTENT)) +#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS)) +#define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT)) -static inline void dec_local(struct drbd_conf *mdev) +static inline void put_ldev(struct drbd_conf *mdev) { __release(local); if (atomic_dec_and_test(&mdev->local_cnt)) @@ -1939,21 +1939,21 @@ static inline void dec_local(struct drbd_conf *mdev) } #ifndef __CHECKER__ -static inline int _inc_local_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) +static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) { int io_allowed; atomic_inc(&mdev->local_cnt); io_allowed = (mdev->state.disk >= mins); if (!io_allowed) - dec_local(mdev); + put_ldev(mdev); return io_allowed; } #else -extern int _inc_local_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins); +extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins); #endif -/* you must have an "inc_local" reference */ +/* you must have an "get_ldev" reference */ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, unsigned long *bits_left, unsigned int *per_mil_done) { @@ -1997,9 +1997,9 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, static inline int drbd_get_max_buffers(struct drbd_conf *mdev) { int mxb = 1000000; /* arbitrary limit on open requests */ - if (inc_net(mdev)) { + if (get_net_conf(mdev)) { mxb = mdev->net_conf->max_buffers; - dec_net(mdev); + put_net_conf(mdev); } return mxb; } @@ -2196,9 +2196,9 @@ static inline void drbd_blk_run_queue(struct request_queue *q) static inline void drbd_kick_lo(struct drbd_conf *mdev) { - if (inc_local(mdev)) { + if (get_ldev(mdev)) { drbd_blk_run_queue(bdev_get_queue(mdev->bc->backing_bdev)); - dec_local(mdev); + put_ldev(mdev); } } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4a2593ce1d37..3c377d326570 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -173,7 +173,7 @@ STATIC struct block_device_operations drbd_ops = { /* When checking with sparse, and this is an inline function, sparse will give tons of false positives. When this is a real functions sparse works. */ -int _inc_local_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) +int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) { int io_allowed; @@ -400,9 +400,9 @@ int drbd_io_error(struct drbd_conf *mdev, int forcedetach) int ok = 1; eh = EP_PASS_ON; - if (inc_local_if_state(mdev, D_FAILED)) { + if (get_ldev_if_state(mdev, D_FAILED)) { eh = mdev->bc->dc.on_io_error; - dec_local(mdev); + put_ldev(mdev); } if (!forcedetach && eh == EP_PASS_ON) @@ -480,11 +480,13 @@ void drbd_force_state(struct drbd_conf *mdev, drbd_change_state(mdev, CS_HARD, mask, val); } -int is_valid_state(struct drbd_conf *mdev, union drbd_state ns); -int is_valid_state_transition(struct drbd_conf *, - union drbd_state, union drbd_state); +STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns); +STATIC int is_valid_state_transition(struct drbd_conf *, + union drbd_state, union drbd_state); +STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, int *warn_sync_abort); int drbd_send_state_req(struct drbd_conf *, - union drbd_state, union drbd_state); + union drbd_state, union drbd_state); STATIC enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) @@ -503,6 +505,8 @@ STATIC enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, spin_lock_irqsave(&mdev->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; + ns = sanitize_state(mdev, os, ns, NULL); + if (!cl_wide_st_chg(mdev, os, ns)) rv = SS_CW_NO_NEED; if (!rv) { @@ -541,6 +545,7 @@ STATIC int drbd_req_state(struct drbd_conf *mdev, spin_lock_irqsave(&mdev->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; + ns = sanitize_state(mdev, os, ns, NULL); if (cl_wide_st_chg(mdev, os, ns)) { rv = is_valid_state(mdev, ns); @@ -567,7 +572,6 @@ STATIC int drbd_req_state(struct drbd_conf *mdev, (rv = _req_st_cond(mdev, mask, val))); if (rv < SS_SUCCESS) { - /* nearly dead code. */ drbd_state_unlock(mdev); if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); @@ -655,7 +659,7 @@ void print_st_err(struct drbd_conf *mdev, A##s_to_name(ns.A)); \ } }) -int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) +STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) { /* See drbd_state_sw_errors in drbd_strings.c */ @@ -663,16 +667,16 @@ int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) int rv = SS_SUCCESS; fp = FP_DONT_CARE; - if (inc_local(mdev)) { + if (get_ldev(mdev)) { fp = mdev->bc->dc.fencing; - dec_local(mdev); + put_ldev(mdev); } - if (inc_net(mdev)) { + if (get_net_conf(mdev)) { if (!mdev->net_conf->two_primaries && ns.role == R_PRIMARY && ns.peer == R_PRIMARY) rv = SS_TWO_PRIMARIES; - dec_net(mdev); + put_net_conf(mdev); } if (rv <= 0) @@ -714,8 +718,8 @@ int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) return rv; } -int is_valid_state_transition(struct drbd_conf *mdev, - union drbd_state ns, union drbd_state os) +STATIC int is_valid_state_transition(struct drbd_conf *mdev, + union drbd_state ns, union drbd_state os) { int rv = SS_SUCCESS; @@ -755,27 +759,17 @@ int is_valid_state_transition(struct drbd_conf *mdev, return rv; } -int __drbd_set_state(struct drbd_conf *mdev, - union drbd_state ns, enum chg_state_flags flags, - struct completion *done) +STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, int *warn_sync_abort) { - union drbd_state os; - int rv = SS_SUCCESS; - int warn_sync_abort = 0; enum drbd_fencing_p fp; - struct after_state_chg_work *ascw; - - - os = mdev->state; fp = FP_DONT_CARE; - if (inc_local(mdev)) { + if (get_ldev(mdev)) { fp = mdev->bc->dc.fencing; - dec_local(mdev); + put_ldev(mdev); } - /* Early state sanitising. */ - /* Dissalow Network errors to configure a device's network part */ if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && os.conn <= C_DISCONNECTING) @@ -804,9 +798,11 @@ int __drbd_set_state(struct drbd_conf *mdev, if (ns.conn <= C_DISCONNECTING && ns.disk == D_DISKLESS) ns.pdsk = D_UNKNOWN; + /* Abort resync if a disk fails/detaches */ if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { - warn_sync_abort = 1; + if (warn_sync_abort) + *warn_sync_abort = 1; ns.conn = C_CONNECTED; } @@ -857,7 +853,7 @@ int __drbd_set_state(struct drbd_conf *mdev, /* Connection breaks down before we finished "Negotiating" */ if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && - inc_local_if_state(mdev, D_NEGOTIATING)) { + get_ldev_if_state(mdev, D_NEGOTIATING)) { if (mdev->ed_uuid == mdev->bc->md.uuid[UI_CURRENT]) { ns.disk = mdev->new_state_tmp.disk; ns.pdsk = mdev->new_state_tmp.pdsk; @@ -866,7 +862,7 @@ int __drbd_set_state(struct drbd_conf *mdev, ns.disk = D_DISKLESS; ns.pdsk = D_UNKNOWN; } - dec_local(mdev); + put_ldev(mdev); } if (fp == FP_STONITH && @@ -887,6 +883,22 @@ int __drbd_set_state(struct drbd_conf *mdev, ns.conn = C_SYNC_TARGET; } + return ns; +} + +int __drbd_set_state(struct drbd_conf *mdev, + union drbd_state ns, enum chg_state_flags flags, + struct completion *done) +{ + union drbd_state os; + int rv = SS_SUCCESS; + int warn_sync_abort = 0; + struct after_state_chg_work *ascw; + + os = mdev->state; + + ns = sanitize_state(mdev, os, ns, &warn_sync_abort); + if (ns.i == os.i) return SS_NOTHING_TO_DO; @@ -997,7 +1009,7 @@ int __drbd_set_state(struct drbd_conf *mdev, mod_timer(&mdev->resync_timer, jiffies); } - if (inc_local(mdev)) { + if (get_ldev(mdev)) { u32 mdf = mdev->bc->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); @@ -1021,7 +1033,7 @@ int __drbd_set_state(struct drbd_conf *mdev, } if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) drbd_set_ed_uuid(mdev, mdev->bc->md.uuid[UI_CURRENT]); - dec_local(mdev); + put_ldev(mdev); } /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ @@ -1102,9 +1114,9 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } fp = FP_DONT_CARE; - if (inc_local(mdev)) { + if (get_ldev(mdev)) { fp = mdev->bc->dc.fencing; - dec_local(mdev); + put_ldev(mdev); } /* Inform userspace about the change... */ @@ -1145,24 +1157,24 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, ns.pdsk == D_OUTDATED)) { kfree(mdev->p_uuid); mdev->p_uuid = NULL; - if (inc_local(mdev)) { + if (get_ldev(mdev)) { if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && mdev->bc->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { drbd_uuid_new_current(mdev); drbd_send_uuids(mdev); } - dec_local(mdev); + put_ldev(mdev); } } - if (ns.pdsk < D_INCONSISTENT && inc_local(mdev)) { + if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { if (ns.peer == R_PRIMARY && mdev->bc->md.uuid[UI_BITMAP] == 0) drbd_uuid_new_current(mdev); /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) drbd_al_to_on_disk_bm(mdev); - dec_local(mdev); + put_ldev(mdev); } /* Last part of the attaching process ... */ @@ -1202,11 +1214,16 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) { - /* since inc_local() only works as long as disk>=D_INCONSISTENT, + /* since get_ldev() only works as long as disk>=D_INCONSISTENT, and it is D_DISKLESS here, local_cnt can only go down, it can not increase... It will reach zero */ wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); + drbd_rs_cancel_all(mdev); + mdev->rs_total = 0; + mdev->rs_failed = 0; + atomic_set(&mdev->rs_pending_cnt, 0); + lc_free(mdev->resync); mdev->resync = NULL; lc_free(mdev->act_log); @@ -1602,7 +1619,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) struct p_uuids p; int i; - if (!inc_local_if_state(mdev, D_NEGOTIATING)) + if (!get_ldev_if_state(mdev, D_NEGOTIATING)) return 1; for (i = UI_CURRENT; i < UI_SIZE; i++) @@ -1615,7 +1632,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); - dec_local(mdev); + put_ldev(mdev); return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, (struct p_header *)&p, sizeof(p)); @@ -1649,13 +1666,13 @@ int drbd_send_sizes(struct drbd_conf *mdev) int q_order_type; int ok; - if (inc_local_if_state(mdev, D_NEGOTIATING)) { + if (get_ldev_if_state(mdev, D_NEGOTIATING)) { D_ASSERT(mdev->bc->backing_bdev); d_size = drbd_get_max_capacity(mdev->bc); u_size = mdev->bc->dc.disk_size; q_order_type = drbd_queue_order_type(mdev); p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev)); - dec_local(mdev); + put_ldev(mdev); } else { d_size = 0; u_size = 0; @@ -1888,7 +1905,7 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) return FALSE; } - if (inc_local(mdev)) { + if (get_ldev(mdev)) { if (drbd_md_test_flag(mdev->bc, MDF_FULL_SYNC)) { dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n"); drbd_bm_set_all(mdev); @@ -1902,7 +1919,7 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) drbd_md_sync(mdev); } } - dec_local(mdev); + put_ldev(mdev); } c = (struct bm_xfer_ctx) { @@ -2165,7 +2182,7 @@ int _drbd_send_page(struct drbd_conf *mdev, struct page *page, return ok; } -static inline int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) +static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) { struct bio_vec *bvec; int i; @@ -2177,7 +2194,7 @@ static inline int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) return 1; } -static inline int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) +static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) { struct bio_vec *bvec; int i; @@ -2807,10 +2824,10 @@ static int drbd_congested(void *congested_data, int bdi_bits) goto out; } - if (inc_local(mdev)) { + if (get_ldev(mdev)) { q = bdev_get_queue(mdev->bc->backing_bdev); r = bdi_congested(&q->backing_dev_info, bdi_bits); - dec_local(mdev); + put_ldev(mdev); if (r) reason = 'b'; } @@ -3077,7 +3094,7 @@ struct meta_data_on_disk { u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */ u32 reserved_u32[4]; -} __attribute((packed)); +} __packed; /** * drbd_md_sync: @@ -3095,7 +3112,7 @@ void drbd_md_sync(struct drbd_conf *mdev) /* We use here D_FAILED and not D_ATTACHING because we try to write * metadata even if we detach due to a disk failure! */ - if (!inc_local_if_state(mdev, D_FAILED)) + if (!get_ldev_if_state(mdev, D_FAILED)) return; trace_drbd_md_io(mdev, WRITE, mdev->bc); @@ -3136,7 +3153,7 @@ void drbd_md_sync(struct drbd_conf *mdev) mdev->bc->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); mutex_unlock(&mdev->md_io_mutex); - dec_local(mdev); + put_ldev(mdev); } /** @@ -3151,7 +3168,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) struct meta_data_on_disk *buffer; int i, rv = NO_ERROR; - if (!inc_local_if_state(mdev, D_ATTACHING)) + if (!get_ldev_if_state(mdev, D_ATTACHING)) return ERR_IO_MD_DISK; trace_drbd_md_io(mdev, READ, bdev); @@ -3210,7 +3227,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) err: mutex_unlock(&mdev->md_io_mutex); - dec_local(mdev); + put_ldev(mdev); return rv; } @@ -3316,7 +3333,7 @@ int drbd_bmio_set_n_write(struct drbd_conf *mdev) { int rv = -EIO; - if (inc_local_if_state(mdev, D_ATTACHING)) { + if (get_ldev_if_state(mdev, D_ATTACHING)) { drbd_md_set_flag(mdev, MDF_FULL_SYNC); drbd_md_sync(mdev); drbd_bm_set_all(mdev); @@ -3328,7 +3345,7 @@ int drbd_bmio_set_n_write(struct drbd_conf *mdev) drbd_md_sync(mdev); } - dec_local(mdev); + put_ldev(mdev); } return rv; @@ -3343,10 +3360,10 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev) { int rv = -EIO; - if (inc_local_if_state(mdev, D_ATTACHING)) { + if (get_ldev_if_state(mdev, D_ATTACHING)) { drbd_bm_clear_all(mdev); rv = drbd_bm_write(mdev); - dec_local(mdev); + put_ldev(mdev); } return rv; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c388478a0188..55dbf83d559f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -164,9 +164,9 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) D_ASSERT(mdev->state.pdsk == D_UNKNOWN); - if (inc_local_if_state(mdev, D_CONSISTENT)) { + if (get_ldev_if_state(mdev, D_CONSISTENT)) { fp = mdev->bc->dc.fencing; - dec_local(mdev); + put_ldev(mdev); } else { dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n"); return mdev->state.pdsk; @@ -312,30 +312,30 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (new_role == R_SECONDARY) { set_disk_ro(mdev->vdisk, TRUE); - if (inc_local(mdev)) { + if (get_ldev(mdev)) { mdev->bc->md.uuid[UI_CURRENT] &= ~(u64)1; - dec_local(mdev); + put_ldev(mdev); } } else { - if (inc_net(mdev)) { + if (get_net_conf(mdev)) { mdev->net_conf->want_lose = 0; - dec_net(mdev); + put_net_conf(mdev); } set_disk_ro(mdev->vdisk, FALSE); - if (inc_local(mdev)) { + if (get_ldev(mdev)) { if (((mdev->state.conn < C_CONNECTED || mdev->state.pdsk <= D_FAILED) && mdev->bc->md.uuid[UI_BITMAP] == 0) || forced) drbd_uuid_new_current(mdev); mdev->bc->md.uuid[UI_CURRENT] |= (u64)1; - dec_local(mdev); + put_ldev(mdev); } } - if ((new_role == R_SECONDARY) && inc_local(mdev)) { + if ((new_role == R_SECONDARY) && get_ldev(mdev)) { drbd_al_to_on_disk_bm(mdev); - dec_local(mdev); + put_ldev(mdev); } if (mdev->state.conn >= C_WF_REPORT_PARAMS) { @@ -737,7 +737,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp union drbd_state ns, os; int rv; int cp_discovered = 0; - int hardsect; + int hardsect_size; drbd_reconfig_start(mdev); @@ -890,7 +890,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (retcode < SS_SUCCESS) goto release_bdev2_fail; - if (!inc_local_if_state(mdev, D_ATTACHING)) + if (!get_ldev_if_state(mdev, D_ATTACHING)) goto force_diskless; drbd_md_set_sector_offsets(mdev, nbc); @@ -915,7 +915,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto force_diskless_dec; } - /* Since we are diskless, fix the AL first... */ + /* Since we are diskless, fix the activity log first... */ if (drbd_check_al_size(mdev)) { retcode = ERR_NOMEM; goto force_diskless_dec; @@ -934,19 +934,19 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto force_diskless_dec; } - /* allocate a second IO page if hardsect != 512 */ - hardsect = drbd_get_hardsect(nbc->md_bdev); - if (hardsect == 0) - hardsect = MD_HARDSECT; + /* allocate a second IO page if hardsect_size != 512 */ + hardsect_size = drbd_get_hardsect_size(nbc->md_bdev); + if (hardsect_size == 0) + hardsect_size = MD_SECTOR_SIZE; - if (hardsect != MD_HARDSECT) { + if (hardsect_size != MD_SECTOR_SIZE) { if (!mdev->md_io_tmpp) { struct page *page = alloc_page(GFP_NOIO); if (!page) goto force_diskless_dec; - dev_warn(DEV, "Meta data's bdev hardsect = %d != %d\n", - hardsect, MD_HARDSECT); + dev_warn(DEV, "Meta data's bdev hardsect_size = %d != %d\n", + hardsect_size, MD_SECTOR_SIZE); dev_warn(DEV, "Workaround engaged (has performace impact).\n"); mdev->md_io_tmpp = page; @@ -1089,13 +1089,13 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_md_sync(mdev); kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); - dec_local(mdev); + put_ldev(mdev); reply->ret_code = retcode; drbd_reconfig_done(mdev); return 0; force_diskless_dec: - dec_local(mdev); + put_ldev(mdev); force_diskless: drbd_force_state(mdev, NS(disk, D_DISKLESS)); drbd_md_sync(mdev); @@ -1201,7 +1201,7 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, odev = minor_to_mdev(i); if (!odev || odev == mdev) continue; - if (inc_net(odev)) { + if (get_net_conf(odev)) { taken_addr = (struct sockaddr *)&odev->net_conf->my_addr; if (new_conf->my_addr_len == odev->net_conf->my_addr_len && !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) @@ -1212,7 +1212,7 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) retcode = ERR_PEER_ADDR; - dec_net(odev); + put_net_conf(odev); if (retcode != NO_ERROR) goto fail; } @@ -1451,7 +1451,7 @@ STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } - if (!inc_local(mdev)) { + if (!get_ldev(mdev)) { retcode = ERR_NO_DISK; goto fail; } @@ -1464,7 +1464,7 @@ STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->bc->dc.disk_size = (sector_t)rs.resize_size; dd = drbd_determin_dev_size(mdev); drbd_md_sync(mdev); - dec_local(mdev); + put_ldev(mdev); if (dd == dev_size_error) { retcode = ERR_NOMEM_BITMAP; goto fail; @@ -1610,14 +1610,14 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } spin_unlock(&mdev->peer_seq_lock); - if (inc_local(mdev)) { + if (get_ldev(mdev)) { wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); drbd_al_shrink(mdev); err = drbd_check_al_size(mdev); lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); - dec_local(mdev); + put_ldev(mdev); drbd_md_sync(mdev); if (err) { @@ -1735,14 +1735,14 @@ STATIC int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl tl = reply->tag_list; - if (inc_local(mdev)) { + if (get_ldev(mdev)) { tl = disk_conf_to_tags(mdev, &mdev->bc->dc, tl); - dec_local(mdev); + put_ldev(mdev); } - if (inc_net(mdev)) { + if (get_net_conf(mdev)) { tl = net_conf_to_tags(mdev, mdev->net_conf, tl); - dec_net(mdev); + put_net_conf(mdev); } tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl); @@ -1763,13 +1763,13 @@ STATIC int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* no local ref, no bitmap, no syncer progress. */ if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) { - if (inc_local(mdev)) { + if (get_ldev(mdev)) { drbd_get_syncer_progress(mdev, &rs_left, &res); *tl++ = T_sync_progress; *tl++ = sizeof(int); memcpy(tl, &res, sizeof(int)); tl = (unsigned short *)((char *)tl + sizeof(int)); - dec_local(mdev); + put_ldev(mdev); } } *tl++ = TT_END; /* Close the tag list */ @@ -1784,7 +1784,7 @@ STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp tl = reply->tag_list; - if (inc_local(mdev)) { + if (get_ldev(mdev)) { /* This is a hand crafted add tag ;) */ *tl++ = T_uuids; *tl++ = UI_SIZE*sizeof(u64); @@ -1794,7 +1794,7 @@ STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp *tl++ = sizeof(int); memcpy(tl, &mdev->bc->md.flags, sizeof(int)); tl = (unsigned short *)((char *)tl + sizeof(int)); - dec_local(mdev); + put_ldev(mdev); } *tl++ = TT_END; /* Close the tag list */ @@ -1852,7 +1852,7 @@ STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */ - if (!inc_local(mdev)) { + if (!get_ldev(mdev)) { retcode = ERR_NO_DISK; goto out; } @@ -1888,7 +1888,7 @@ STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl drbd_md_sync(mdev); out_dec: - dec_local(mdev); + put_ldev(mdev); out: mutex_unlock(&mdev->state_mutex); @@ -2045,7 +2045,7 @@ STATIC void drbd_connector_callback(void *data) static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */ -static inline unsigned short * +static unsigned short * __tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len, int nul_terminated) { @@ -2065,19 +2065,19 @@ __tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, return tl; } -static inline unsigned short * +static unsigned short * tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len) { return __tl_add_blob(tl, tag, data, len, 0); } -static inline unsigned short * +static unsigned short * tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str) { return __tl_add_blob(tl, tag, str, strlen(str)+1, 0); } -static inline unsigned short * +static unsigned short * tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val) { switch(tag_type(tag)) { @@ -2255,10 +2255,10 @@ void drbd_bcast_sync_progress(struct drbd_conf *mdev) unsigned int res; /* no local ref, no bitmap, no syncer progress, no broadcast. */ - if (!inc_local(mdev)) + if (!get_ldev(mdev)) return; drbd_get_syncer_progress(mdev, &rs_left, &res); - dec_local(mdev); + put_ldev(mdev); *tl++ = T_sync_progress; *tl++ = sizeof(int); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 7de68d9d6aba..9f0a3c0e952c 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -243,10 +243,10 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v) mdev->rs_total - mdev->ov_left, mdev->rs_total); - if (proc_details >= 1 && inc_local_if_state(mdev, D_FAILED)) { + if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) { lc_printf_stats(seq, mdev->resync); lc_printf_stats(seq, mdev->act_log); - dec_local(mdev); + put_ldev(mdev); } if (proc_details >= 2) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 077480fe6923..25da228de2fd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -70,7 +70,8 @@ STATIC int drbd_do_auth(struct drbd_conf *mdev); STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); STATIC int e_end_block(struct drbd_conf *, struct drbd_work *, int); -static inline struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch) + +static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch) { struct drbd_epoch *prev; spin_lock(&mdev->epoch_lock); @@ -565,7 +566,7 @@ STATIC struct socket *drbd_try_connect(struct drbd_conf *mdev) int err; int disconnect_on_error = 1; - if (!inc_net(mdev)) + if (!get_net_conf(mdev)) return NULL; what = "sock_create_kern"; @@ -629,7 +630,7 @@ out: if (disconnect_on_error) drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); } - dec_net(mdev); + put_net_conf(mdev); return sock; } @@ -639,7 +640,7 @@ STATIC struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) struct socket *s_estab = NULL, *s_listen; const char *what; - if (!inc_net(mdev)) + if (!get_net_conf(mdev)) return NULL; what = "sock_create_kern"; @@ -675,7 +676,7 @@ out: drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); } } - dec_net(mdev); + put_net_conf(mdev); return s_estab; } @@ -934,7 +935,7 @@ STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d { int rv; - if (mdev->write_ordering >= WO_bdev_flush && inc_local(mdev)) { + if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { rv = blkdev_issue_flush(mdev->bc->backing_bdev, NULL); if (rv) { dev_err(DEV, "local disk flush failed with status %d\n", rv); @@ -943,7 +944,7 @@ STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d * if (rv == -EOPNOTSUPP) */ drbd_bump_write_ordering(mdev, WO_drain_io); } - dec_local(mdev); + put_ldev(mdev); } return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE); @@ -1138,7 +1139,7 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea /* prepare bio for re-submit, * re-init volatile members */ /* we still have a local reference, - * inc_local was done in receive_Data. */ + * get_ldev was done in receive_Data. */ bio->bi_bdev = mdev->bc->backing_bdev; bio->bi_sector = e->sector; bio->bi_size = e->size; @@ -1428,7 +1429,7 @@ STATIC int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si e = read_in_block(mdev, ID_SYNCER, sector, data_size); if (!e) { - dec_local(mdev); + put_ldev(mdev); return FALSE; } @@ -1513,9 +1514,9 @@ STATIC int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) sector = be64_to_cpu(p->sector); D_ASSERT(p->block_id == ID_SYNCER); - if (inc_local(mdev)) { + if (get_ldev(mdev)) { /* data is submitted to disk within recv_resync_read. - * corresponding dec_local done below on error, + * corresponding put_ldev done below on error, * or in drbd_endio_write_sec. */ ok = recv_resync_read(mdev, sector, data_size); } else { @@ -1671,9 +1672,9 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct p_header *h) if (drbd_recv(mdev, h->payload, header_size) != header_size) return FALSE; - if (!inc_local(mdev)) { + if (!get_ldev(mdev)) { /* data is submitted to disk at the end of this function. - * corresponding dec_local done either below (on error), + * corresponding put_ldev done either below (on error), * or in drbd_endio_write_sec. */ if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Can not write mirrored data block " @@ -1691,7 +1692,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct p_header *h) sector = be64_to_cpu(p->sector); e = read_in_block(mdev, p->block_id, sector, data_size); if (!e) { - dec_local(mdev); + put_ldev(mdev); return FALSE; } @@ -1835,7 +1836,7 @@ STATIC int receive_Data(struct drbd_conf *mdev, struct p_header *h) /* we could probably send that P_DISCARD_ACK ourselves, * but I don't like the receiver using the msock */ - dec_local(mdev); + put_ldev(mdev); wake_asender(mdev); finish_wait(&mdev->misc_wait, &wait); return TRUE; @@ -1905,7 +1906,7 @@ out_interrupted: /* yes, the epoch_size now is imbalanced. * but we drop the connection anyways, so we don't have a chance to * receive a barrier... atomic_inc(&mdev->epoch_size); */ - dec_local(mdev); + put_ldev(mdev); drbd_free_ee(mdev, e); return FALSE; } @@ -1939,7 +1940,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) return FALSE; } - if (!inc_local_if_state(mdev, D_UP_TO_DATE)) { + if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Can not satisfy peer's read request, " "no local data.\n"); @@ -1950,7 +1951,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_KERNEL); if (!e) { - dec_local(mdev); + put_ldev(mdev); return FALSE; } @@ -1974,7 +1975,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) /* we have been interrupted, * probably connection lost! */ D_ASSERT(signal_pending(current)); - dec_local(mdev); + put_ldev(mdev); drbd_free_ee(mdev, e); return 0; } @@ -1986,7 +1987,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) digest_size = h->length - brps ; di = kmalloc(sizeof(*di) + digest_size, GFP_KERNEL); if (!di) { - dec_local(mdev); + put_ldev(mdev); drbd_free_ee(mdev, e); return 0; } @@ -1995,7 +1996,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) di->digest = (((char *)di)+sizeof(struct digest_info)); if (drbd_recv(mdev, di->digest, digest_size) != digest_size) { - dec_local(mdev); + put_ldev(mdev); drbd_free_ee(mdev, e); kfree(di); return FALSE; @@ -2016,7 +2017,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) D_ASSERT(signal_pending(current)); drbd_free_ee(mdev, e); kfree(di); - dec_local(mdev); + put_ldev(mdev); return FALSE; } break; @@ -2033,7 +2034,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) /* we have been interrupted, * probably connection lost! */ D_ASSERT(signal_pending(current)); - dec_local(mdev); + put_ldev(mdev); drbd_free_ee(mdev, e); return 0; } @@ -2737,7 +2738,7 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) mdev->p_size = p_size; #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) - if (inc_local(mdev)) { + if (get_ldev(mdev)) { warn_if_differ_considerably(mdev, "lower level device sizes", p_size, drbd_get_max_capacity(mdev->bc)); warn_if_differ_considerably(mdev, "user requested size", @@ -2766,16 +2767,16 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) dev_err(DEV, "The peer's disk size is too small!\n"); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); mdev->bc->dc.disk_size = my_usize; - dec_local(mdev); + put_ldev(mdev); return FALSE; } - dec_local(mdev); + put_ldev(mdev); } #undef min_not_zero - if (inc_local(mdev)) { + if (get_ldev(mdev)) { dd = drbd_determin_dev_size(mdev); - dec_local(mdev); + put_ldev(mdev); if (dd == dev_size_error) return FALSE; drbd_md_sync(mdev); @@ -2784,10 +2785,10 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) drbd_set_my_capacity(mdev, p_size); } - if (mdev->p_uuid && mdev->state.conn <= C_CONNECTED && inc_local(mdev)) { + if (mdev->p_uuid && mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { nconn = drbd_sync_handshake(mdev, mdev->state.peer, mdev->state.pdsk); - dec_local(mdev); + put_ldev(mdev); if (nconn == C_MASK) { drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); @@ -2800,7 +2801,7 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) } } - if (inc_local(mdev)) { + if (get_ldev(mdev)) { if (mdev->bc->known_size != drbd_get_capacity(mdev->bc->backing_bdev)) { mdev->bc->known_size = drbd_get_capacity(mdev->bc->backing_bdev); ldsc = 1; @@ -2811,7 +2812,7 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) drbd_setup_queue_param(mdev, max_seg_s); drbd_setup_order_type(mdev, be32_to_cpu(p->queue_order_type)); - dec_local(mdev); + put_ldev(mdev); } if (mdev->state.conn > C_WF_REPORT_PARAMS) { @@ -2861,7 +2862,7 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct p_header *h) return FALSE; } - if (inc_local(mdev)) { + if (get_ldev(mdev)) { int skip_initial_sync = mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 && @@ -2877,7 +2878,7 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct p_header *h) CS_VERBOSE, NULL); drbd_md_sync(mdev); } - dec_local(mdev); + put_ldev(mdev); } /* Before we test for the disk state, we should wait until an eventually @@ -2982,7 +2983,7 @@ STATIC int receive_state(struct drbd_conf *mdev, struct p_header *h) nconn = C_CONNECTED; if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && - inc_local_if_state(mdev, D_NEGOTIATING)) { + get_ldev_if_state(mdev, D_NEGOTIATING)) { int cr; /* consider resync */ cr = (oconn < C_CONNECTED); @@ -2995,7 +2996,7 @@ STATIC int receive_state(struct drbd_conf *mdev, struct p_header *h) if (cr) nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk); - dec_local(mdev); + put_ldev(mdev); if (nconn == C_MASK) { if (mdev->state.disk == D_NEGOTIATING) { drbd_force_state(mdev, NS(disk, D_DISKLESS)); @@ -3066,13 +3067,13 @@ STATIC int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) /* Here the _drbd_uuid_ functions are right, current should _not_ be rotated into the history */ - if (inc_local_if_state(mdev, D_NEGOTIATING)) { + if (get_ldev_if_state(mdev, D_NEGOTIATING)) { _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid)); _drbd_uuid_set(mdev, UI_BITMAP, 0UL); drbd_start_resync(mdev, C_SYNC_TARGET); - dec_local(mdev); + put_ldev(mdev); } else dev_err(DEV, "Ignoring SyncUUID packet!\n"); @@ -3517,9 +3518,9 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) drbd_md_sync(mdev); fp = FP_DONT_CARE; - if (inc_local(mdev)) { + if (get_ldev(mdev)) { fp = mdev->bc->dc.fencing; - dec_local(mdev); + put_ldev(mdev); } if (mdev->state.role == R_PRIMARY) { @@ -3870,9 +3871,9 @@ STATIC int drbdd_init(struct drbd_thread *thi) } while (h == 0); if (h > 0) { - if (inc_net(mdev)) { + if (get_net_conf(mdev)) { drbdd(mdev); - dec_net(mdev); + put_net_conf(mdev); } } @@ -3929,12 +3930,38 @@ STATIC int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) drbd_rs_complete_io(mdev, sector); drbd_set_in_sync(mdev, sector, blksize); /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ - mdev->rs_same_csum += (blksize >> BM_BLOCK_SIZE_B); + mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); dec_rs_pending(mdev); return TRUE; } +/* when we receive the ACK for a write request, + * verify that we actually know about it */ +static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, + u64 id, sector_t sector) +{ + struct hlist_head *slot = tl_hash_slot(mdev, sector); + struct hlist_node *n; + struct drbd_request *req; + + hlist_for_each_entry(req, n, slot, colision) { + if ((unsigned long)req == (unsigned long)id) { + if (req->sector != sector) { + dev_err(DEV, "_ack_id_to_req: found req %p but it has " + "wrong sector (%llus versus %llus)\n", req, + (unsigned long long)req->sector, + (unsigned long long)sector); + break; + } + return req; + } + } + dev_err(DEV, "_ack_id_to_req: failed to find req %p, sector %llus in list\n", + (void *)(unsigned long)id, (unsigned long long)sector); + return NULL; +} + STATIC int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) { struct drbd_request *req; @@ -4060,10 +4087,10 @@ STATIC int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) dec_rs_pending(mdev); - if (inc_local_if_state(mdev, D_FAILED)) { + if (get_ldev_if_state(mdev, D_FAILED)) { drbd_rs_complete_io(mdev, sector); drbd_rs_failed_io(mdev, sector, size); - dec_local(mdev); + put_ldev(mdev); } return TRUE; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index dcf642563c77..2e70345a06d4 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -34,7 +34,7 @@ /* Update disk stats at start of I/O request */ -static inline void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio) +static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio) { const int rw = bio_data_dir(bio); int cpu; @@ -46,7 +46,7 @@ static inline void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_reque } /* Update disk stats when completing request upwards */ -static inline void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) +static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) { int rw = bio_data_dir(req->master_bio); unsigned long duration = jiffies - req->start_time; @@ -93,9 +93,9 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const * we would forget to resync the corresponding extent. */ if (s & RQ_LOCAL_MASK) { - if (inc_local_if_state(mdev, D_FAILED)) { + if (get_ldev_if_state(mdev, D_FAILED)) { drbd_al_complete_io(mdev, req->sector); - dec_local(mdev); + put_ldev(mdev); } else if (__ratelimit(&drbd_ratelimit_state)) { dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), " "but my Disk seems to have failed :(\n", @@ -338,7 +338,7 @@ STATIC int _req_conflicts(struct drbd_request *req) D_ASSERT(hlist_unhashed(&req->colision)); - if (!inc_net(mdev)) + if (!get_net_conf(mdev)) return 0; /* BUG_ON */ @@ -383,11 +383,11 @@ STATIC int _req_conflicts(struct drbd_request *req) out_no_conflict: /* this is like it should be, and what we expected. * our users do behave after all... */ - dec_net(mdev); + put_net_conf(mdev); return 0; out_conflict: - dec_net(mdev); + put_net_conf(mdev); return 1; } @@ -402,9 +402,6 @@ out_conflict: * happen "atomically" within the req_lock, * and it enforces that we have to think in a very structured manner * about the "events" that may happen to a request during its life time ... - * - * Though I think it is likely that we break this again into many - * static inline void _req_mod_ ## what (req) ... */ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) { @@ -453,7 +450,7 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) req->rq_state &= ~RQ_LOCAL_PENDING; _req_may_be_done(req, error); - dec_local(mdev); + put_ldev(mdev); break; case write_completed_with_error: @@ -467,7 +464,7 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) /* and now: check how to handle local io error. */ __drbd_chk_io_error(mdev, FALSE); _req_may_be_done(req, error); - dec_local(mdev); + put_ldev(mdev); break; case read_completed_with_error: @@ -482,19 +479,19 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) if (bio_rw(req->master_bio) == READA) { /* it is legal to fail READA */ _req_may_be_done(req, error); - dec_local(mdev); + put_ldev(mdev); break; } /* else */ dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", (unsigned long long)req->sector, req->size); - /* _req_mod(req,to_be_send); oops, recursion in static inline */ + /* _req_mod(req,to_be_send); oops, recursion... */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); req->rq_state |= RQ_NET_PENDING; inc_ap_pending(mdev); __drbd_chk_io_error(mdev, FALSE); - dec_local(mdev); + put_ldev(mdev); /* NOTE: if we have no connection, * or know the peer has no good data either, * then we don't actually need to "queue_for_net_read", @@ -739,7 +736,7 @@ STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) trace_drbd_bio(mdev, "Rq", bio, 0, req); - local = inc_local(mdev); + local = get_ldev(mdev); if (!local) { bio_put(req->private_bio); /* or we get a bio leak */ req->private_bio = NULL; @@ -758,7 +755,7 @@ STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) local = 0; bio_put(req->private_bio); req->private_bio = NULL; - dec_local(mdev); + put_ldev(mdev); } } remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; @@ -898,7 +895,7 @@ allocate_barrier: bio_put(req->private_bio); req->private_bio = NULL; drbd_al_complete_io(mdev, req->sector); - dec_local(mdev); + put_ldev(mdev); local = 0; } if (remote) @@ -953,7 +950,7 @@ fail_and_free_req: if (local) { bio_put(req->private_bio); req->private_bio = NULL; - dec_local(mdev); + put_ldev(mdev); } bio_endio(bio, err); drbd_req_free(req); @@ -1120,14 +1117,14 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct if (bio_size == 0) { if (limit <= bvec->bv_len) limit = bvec->bv_len; - } else if (limit && inc_local(mdev)) { + } else if (limit && get_ldev(mdev)) { struct request_queue * const b = mdev->bc->backing_bdev->bd_disk->queue; if (b->merge_bvec_fn && mdev->bc->dc.use_bmbv) { backing_limit = b->merge_bvec_fn(b, bvm, bvec); limit = min(limit, backing_limit); } - dec_local(mdev); + put_ldev(mdev); } return limit; } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index a63a1e9ae5a8..81393ffa8c25 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -218,32 +218,6 @@ struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector) ((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s); } -/* when we receive the ACK for a write request, - * verify that we actually know about it */ -static inline struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, - u64 id, sector_t sector) -{ - struct hlist_head *slot = tl_hash_slot(mdev, sector); - struct hlist_node *n; - struct drbd_request *req; - - hlist_for_each_entry(req, n, slot, colision) { - if ((unsigned long)req == (unsigned long)id) { - if (req->sector != sector) { - dev_err(DEV, "_ack_id_to_req: found req %p but it has " - "wrong sector (%llus versus %llus)\n", req, - (unsigned long long)req->sector, - (unsigned long long)sector); - break; - } - return req; - } - } - dev_err(DEV, "_ack_id_to_req: failed to find req %p, sector %llus in list\n", - (void *)(unsigned long)id, (unsigned long long)sector); - return NULL; -} - /* application reads (drbd_request objects) */ static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) { diff --git a/drivers/block/drbd/drbd_tracing.c b/drivers/block/drbd/drbd_tracing.c index 2eff178fbb0f..ab5aba9c4972 100644 --- a/drivers/block/drbd/drbd_tracing.c +++ b/drivers/block/drbd/drbd_tracing.c @@ -213,7 +213,7 @@ static void probe_drbd_netlink(void *data, int is_req) static void probe_drbd_actlog(struct drbd_conf *mdev, sector_t sector, char* msg) { - unsigned int enr = (sector >> (AL_EXTENT_SIZE_B-9)); + unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9)); if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS)) return; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 81f3a4e8ea49..dd984502d62e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -82,11 +82,7 @@ void drbd_md_io_complete(struct bio *bio, int error) { struct drbd_md_io *md_io; - /* error parameter ignored: - * drbd_md_sync_page_io explicitly tests bio_uptodate(bio); */ - md_io = (struct drbd_md_io *)bio->bi_private; - md_io->error = error; trace_drbd_bio(md_io->mdev, "Md", bio, 1, NULL); @@ -128,7 +124,7 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) drbd_chk_io_error(mdev, error, FALSE); drbd_queue_work(&mdev->data.work, &e->w); - dec_local(mdev); + put_ldev(mdev); trace_drbd_ee(mdev, e, "read completed"); } @@ -214,7 +210,7 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) drbd_al_complete_io(mdev, e_sector); wake_asender(mdev); - dec_local(mdev); + put_ldev(mdev); } @@ -367,7 +363,7 @@ STATIC int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) { struct drbd_epoch_entry *e; - if (!inc_local(mdev)) + if (!get_ldev(mdev)) return 0; if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE)) @@ -375,7 +371,7 @@ STATIC int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY); if (!e) { - dec_local(mdev); + put_ldev(mdev); return 2; } @@ -441,16 +437,16 @@ int w_make_resync_request(struct drbd_conf *mdev, dev_err(DEV, "%s in w_make_resync_request\n", conns_to_name(mdev->state.conn)); - if (!inc_local(mdev)) { + if (!get_ldev(mdev)) { /* Since we only need to access mdev->rsync a - inc_local_if_state(mdev,D_FAILED) would be sufficient, but + get_ldev_if_state(mdev,D_FAILED) would be sufficient, but to continue resync with a broken disk makes no sense at all */ dev_err(DEV, "Disk broke down during resync!\n"); mdev->resync_work.cb = w_resync_inactive; return 1; } - /* All goto requeses have to happend after this block: inc_local() */ + /* All goto requeses have to happend after this block: get_ldev() */ number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ); @@ -466,7 +462,7 @@ next_sector: if (bit == -1UL) { mdev->bm_resync_fo = drbd_bm_bits(mdev); mdev->resync_work.cb = w_resync_inactive; - dec_local(mdev); + put_ldev(mdev); return 1; } @@ -533,7 +529,7 @@ next_sector: if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) { switch (read_for_csum(mdev, sector, size)) { case 0: /* Disk failure*/ - dec_local(mdev); + put_ldev(mdev); return 0; case 2: /* Allocation failed */ drbd_rs_complete_io(mdev, sector); @@ -547,7 +543,7 @@ next_sector: sector, size, ID_SYNCER)) { dev_err(DEV, "drbd_send_drequest() failed, aborting...\n"); dec_rs_pending(mdev); - dec_local(mdev); + put_ldev(mdev); return 0; } } @@ -561,13 +557,13 @@ next_sector: * until then resync "work" is "inactive" ... */ mdev->resync_work.cb = w_resync_inactive; - dec_local(mdev); + put_ldev(mdev); return 1; } requeue: mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); - dec_local(mdev); + put_ldev(mdev); return 1; } @@ -677,7 +673,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) dbdt = Bit2KB(db/dt); mdev->rs_paused /= HZ; - if (!inc_local(mdev)) + if (!get_ldev(mdev)) goto out; spin_lock_irq(&mdev->req_lock); @@ -765,7 +761,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); out_unlock: spin_unlock_irq(&mdev->req_lock); - dec_local(mdev); + put_ldev(mdev); out: mdev->rs_total = 0; mdev->rs_failed = 0; @@ -840,9 +836,9 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return 1; } - if (inc_local_if_state(mdev, D_FAILED)) { + if (get_ldev_if_state(mdev, D_FAILED)) { drbd_rs_complete_io(mdev, e->sector); - dec_local(mdev); + put_ldev(mdev); } if (likely(drbd_bio_uptodate(e->private_bio))) { @@ -1265,7 +1261,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) drbd_bm_recount_bits(mdev); - /* In case a previous resync run was aborted by an IO error... */ + /* In case a previous resync run was aborted by an IO error/detach on the peer. */ drbd_rs_cancel_all(mdev); if (side == C_SYNC_TARGET) { @@ -1284,7 +1280,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) drbd_state_lock(mdev); - if (!inc_local_if_state(mdev, D_NEGOTIATING)) { + if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { drbd_state_unlock(mdev); return; } @@ -1331,12 +1327,12 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) } write_unlock_irq(&global_state_lock); drbd_state_unlock(mdev); - dec_local(mdev); + put_ldev(mdev); if (r == SS_SUCCESS) { dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", conns_to_name(ns.conn), - (unsigned long) mdev->rs_total << (BM_BLOCK_SIZE_B-10), + (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), (unsigned long) mdev->rs_total); if (mdev->rs_total == 0) { diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index b7ce5acca0bb..724fb44aad06 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -4,11 +4,10 @@ #include #include - /* see get_sb_bdev and bd_claim */ extern char *drbd_sec_holder; -static inline sector_t drbd_get_hardsect(struct block_device *bdev) +static inline sector_t drbd_get_hardsect_size(struct block_device *bdev) { return bdev->bd_disk->queue->hardsect_size; } diff --git a/drivers/block/drbd/lru_cache.c b/drivers/block/drbd/lru_cache.c index 71858ff5b02c..80b0839a529d 100644 --- a/drivers/block/drbd/lru_cache.c +++ b/drivers/block/drbd/lru_cache.c @@ -34,13 +34,13 @@ #define PARANOIA_LEAVE() do { clear_bit(__LC_PARANOIA, &lc->flags); smp_mb__after_clear_bit(); } while (0) #define RETURN(x...) do { PARANOIA_LEAVE(); return x ; } while (0) -static inline size_t size_of_lc(unsigned int e_count, size_t e_size) +static size_t size_of_lc(unsigned int e_count, size_t e_size) { return sizeof(struct lru_cache) + e_count * (e_size + sizeof(struct hlist_head)); } -static inline void lc_init(struct lru_cache *lc, +static void lc_init(struct lru_cache *lc, const size_t bytes, const char *name, const unsigned int e_count, const size_t e_size, void *private_p) -- cgit v1.2.3 From 27fabf42b3c043c104861650fe413cff8db8bd45 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 15 May 2009 12:59:47 +0200 Subject: Mostry cleanups, triggered by reviews Moved lru_cache.c to ~linux/lib lru_cache - documentation update, type cast removal Using kmalloc() first, trying vmalloc() as fallback for the page anchors of the bitmap Making everything kernel-doc compliant rename mdev->bc to mdev->ldev (to match the recent change to get_ldev/put_ldev) make drbd thread t_lock irqsave - lockdep complained, and lockdep is right (theoretically) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Kconfig | 1 + drivers/block/drbd/Makefile | 2 +- drivers/block/drbd/drbd_actlog.c | 174 ++++++++------ drivers/block/drbd/drbd_bitmap.c | 99 ++++---- drivers/block/drbd/drbd_buildtag.c | 4 +- drivers/block/drbd/drbd_int.h | 96 +++++--- drivers/block/drbd/drbd_main.c | 390 ++++++++++++++++++++----------- drivers/block/drbd/drbd_nl.c | 113 ++++----- drivers/block/drbd/drbd_proc.c | 9 +- drivers/block/drbd/drbd_receiver.c | 93 ++++---- drivers/block/drbd/drbd_req.c | 6 +- drivers/block/drbd/drbd_tracing.c | 40 ++-- drivers/block/drbd/drbd_worker.c | 52 +++-- drivers/block/drbd/lru_cache.c | 398 ------------------------------- drivers/block/drbd/lru_cache.h | 116 ---------- include/linux/lru_cache.h | 285 +++++++++++++++++++++++ lib/Kconfig | 3 + lib/Makefile | 2 + lib/lru_cache.c | 464 +++++++++++++++++++++++++++++++++++++ 19 files changed, 1382 insertions(+), 965 deletions(-) delete mode 100644 drivers/block/drbd/lru_cache.c delete mode 100644 drivers/block/drbd/lru_cache.h create mode 100644 include/linux/lru_cache.h create mode 100644 lib/lru_cache.c diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index 7ad8c2a3c2f3..b3676771731d 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -8,6 +8,7 @@ comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected" config BLK_DEV_DRBD tristate "DRBD Distributed Replicated Block Device support" depends on PROC_FS && INET && CONNECTOR + select LRU_CACHE help NOTE: In order to authenticate connections you have to select diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index f0f805cd2051..9dd069b0ded0 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -1,6 +1,6 @@ drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o -drbd-y += lru_cache.o drbd_main.o drbd_strings.o drbd_nl.o +drbd-y += drbd_main.o drbd_strings.o drbd_nl.o drbd_trace-y := drbd_tracing.o drbd_strings.o diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index f1318e57f375..6b096b1720ea 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -200,13 +200,13 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) { struct lc_element *al_ext; - struct bm_extent *bm_ext; + struct lc_element *tmp; unsigned long al_flags = 0; spin_lock_irq(&mdev->al_lock); - bm_ext = (struct bm_extent *) - lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); - if (unlikely(bm_ext != NULL)) { + tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT); + if (unlikely(tmp != NULL)) { + struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { spin_unlock_irq(&mdev->al_lock); return NULL; @@ -329,9 +329,9 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) mx = min_t(int, AL_EXTENTS_PT, mdev->act_log->nr_elements - mdev->al_tr_cycle); for (i = 0; i < mx; i++) { - extent_nr = lc_entry(mdev->act_log, - mdev->al_tr_cycle+i)->lc_number; - buffer->updates[i+1].pos = cpu_to_be32(mdev->al_tr_cycle+i); + unsigned idx = mdev->al_tr_cycle + i; + extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; + buffer->updates[i+1].pos = cpu_to_be32(idx); buffer->updates[i+1].extent = cpu_to_be32(extent_nr); xor_sum ^= extent_nr; } @@ -346,10 +346,10 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) buffer->xor_sum = cpu_to_be32(xor_sum); - sector = mdev->bc->md.md_offset - + mdev->bc->md.al_offset + mdev->al_tr_pos; + sector = mdev->ldev->md.md_offset + + mdev->ldev->md.al_offset + mdev->al_tr_pos; - if (!drbd_md_sync_page_io(mdev, mdev->bc, sector, WRITE)) { + if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { drbd_chk_io_error(mdev, 1, TRUE); drbd_io_error(mdev, TRUE); } @@ -370,10 +370,13 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) } /** - * drbd_al_read_tr: Reads a single transaction record from the - * on disk activity log. - * Returns -1 on IO error, 0 on checksum error and 1 if it is a valid - * record. + * drbd_al_read_tr() - Read a single transaction from the on disk activity log + * @mdev: DRBD device. + * @bdev: Block device to read form. + * @b: pointer to an al_transaction. + * @index: On disk slot of the transaction to read. + * + * Returns -1 on IO error, 0 on checksum error and 1 upon success. */ STATIC int drbd_al_read_tr(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, @@ -401,9 +404,11 @@ STATIC int drbd_al_read_tr(struct drbd_conf *mdev, } /** - * drbd_al_read_log: Restores the activity log from its on disk - * representation. Returns 1 on success, returns 0 when - * reading the log failed due to IO errors. + * drbd_al_read_log() - Restores the activity log from its on disk representation. + * @mdev: DRBD device. + * @bdev: Block device to read form. + * + * Returns 1 on success, returns 0 when reading the log failed due to IO errors. */ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { @@ -557,8 +562,8 @@ STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev, { struct bio *bio; struct page *page; - sector_t on_disk_sector = enr + mdev->bc->md.md_offset - + mdev->bc->md.bm_offset; + sector_t on_disk_sector = enr + mdev->ldev->md.md_offset + + mdev->ldev->md.bm_offset; unsigned int page_offset = PAGE_SIZE; int offset; int i = 0; @@ -604,7 +609,7 @@ STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev, bio->bi_private = wc; bio->bi_end_io = atodb_endio; - bio->bi_bdev = mdev->bc->md_bdev; + bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_sector = on_disk_sector; if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE) @@ -631,11 +636,11 @@ out_bio_put: } /** - * drbd_al_to_on_disk_bm: - * Writes the areas of the bitmap which are covered by the - * currently active extents of the activity log. - * called when we detach (unconfigure) local storage, - * or when we go from R_PRIMARY to R_SECONDARY state. + * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents + * @mdev: DRBD device. + * + * Called when we detach (unconfigure) local storage, + * or when we go from R_PRIMARY to R_SECONDARY role. */ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) { @@ -661,7 +666,7 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) wc.error = 0; for (i = 0; i < nr_elements; i++) { - enr = lc_entry(mdev->act_log, i)->lc_number; + enr = lc_element_by_index(mdev->act_log, i)->lc_number; if (enr == LC_FREE) continue; /* next statement also does atomic_inc wc.count and local_cnt */ @@ -687,7 +692,7 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) } } - drbd_blk_run_queue(bdev_get_queue(mdev->bc->md_bdev)); + drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev)); /* always (try to) flush bitmap to stable storage */ drbd_md_flush(mdev); @@ -719,7 +724,7 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n"); for (i = 0; i < mdev->act_log->nr_elements; i++) { - enr = lc_entry(mdev->act_log, i)->lc_number; + enr = lc_element_by_index(mdev->act_log, i)->lc_number; if (enr == LC_FREE) continue; /* Really slow: if we have al-extents 16..19 active, @@ -733,8 +738,8 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) } /** - * drbd_al_apply_to_bm: Sets the bits in the in-memory bitmap - * which are described by the active extents of the activity log. + * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents + * @mdev: DRBD device. */ void drbd_al_apply_to_bm(struct drbd_conf *mdev) { @@ -746,7 +751,7 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev) wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); for (i = 0; i < mdev->act_log->nr_elements; i++) { - enr = lc_entry(mdev->act_log, i)->lc_number; + enr = lc_element_by_index(mdev->act_log, i)->lc_number; if (enr == LC_FREE) continue; add += drbd_bm_ALe_set_all(mdev, enr); @@ -773,8 +778,12 @@ static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext) } /** - * drbd_al_shrink: Removes all active extents form the activity log. - * (but does not write any transactions) + * drbd_al_shrink() - Removes all active extents form the activity log + * @mdev: DRBD device. + * + * Removes all active extents form the activity log, waiting until + * the reference count of each etry dropped to 0 first, of course. + * * You need to lock mdev->act_log with lc_try_lock() / lc_unlock() */ void drbd_al_shrink(struct drbd_conf *mdev) @@ -785,7 +794,7 @@ void drbd_al_shrink(struct drbd_conf *mdev) D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); for (i = 0; i < mdev->act_log->nr_elements; i++) { - al_ext = lc_entry(mdev->act_log, i); + al_ext = lc_element_by_index(mdev->act_log, i); if (al_ext->lc_number == LC_FREE) continue; wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext)); @@ -834,7 +843,7 @@ STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, int count, int success) { - struct bm_extent *ext; + struct lc_element *e; struct update_odbm_work *udw; unsigned int enr; @@ -845,8 +854,9 @@ STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, * a 16 MB extent border. (Currently this is true...) */ enr = BM_SECT_TO_EXT(sector); - ext = (struct bm_extent *) lc_get(mdev->resync, enr); - if (ext) { + e = lc_get(mdev->resync, enr); + if (e) { + struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); if (ext->lce.lc_number == enr) { if (success) ext->rs_left -= count; @@ -1006,7 +1016,7 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, unsigned long sbnr, ebnr, lbnr, flags; sector_t esector, nr_sectors; unsigned int enr, count; - struct bm_extent *ext; + struct lc_element *e; if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) { dev_err(DEV, "sector: %llus, size: %d\n", @@ -1042,9 +1052,9 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, count = drbd_bm_set_bits(mdev, sbnr, ebnr); enr = BM_SECT_TO_EXT(sector); - ext = (struct bm_extent *) lc_find(mdev->resync, enr); - if (ext) - ext->rs_left += count; + e = lc_find(mdev->resync, enr); + if (e) + lc_entry(e, struct bm_extent, lce)->rs_left += count; spin_unlock_irqrestore(&mdev->al_lock, flags); out: @@ -1054,21 +1064,23 @@ out: static struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) { - struct bm_extent *bm_ext; + struct lc_element *e; + struct bm_extent *bm_ext; int wakeup = 0; - unsigned long rs_flags; + unsigned long rs_flags; spin_lock_irq(&mdev->al_lock); if (mdev->resync_locked > mdev->resync->nr_elements/2) { spin_unlock_irq(&mdev->al_lock); return NULL; } - bm_ext = (struct bm_extent *) lc_get(mdev->resync, enr); + e = lc_get(mdev->resync, enr); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (bm_ext) { if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, (struct lc_element *)bm_ext); + lc_changed(mdev->resync, &bm_ext->lce); wakeup = 1; } if (bm_ext->lce.refcnt == 1) @@ -1116,14 +1128,11 @@ static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) } /** - * drbd_rs_begin_io: Gets an extent in the resync LRU cache and sets it - * to BME_LOCKED. + * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED + * @mdev: DRBD device. + * @sector: The sector number. * - * @sector: The sector number - * - * sleeps on al_wait. - * returns 1 if successful. - * returns 0 if interrupted. + * This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted. */ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) { @@ -1164,19 +1173,19 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) } /** - * drbd_try_rs_begin_io: Gets an extent in the resync LRU cache, sets it - * to BME_NO_WRITES, then tries to set it to BME_LOCKED. - * - * @sector: The sector number + * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep + * @mdev: DRBD device. + * @sector: The sector number. * - * does not sleep. - * returns zero if we could set BME_LOCKED and can proceed, - * -EAGAIN if we need to try again. + * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then + * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN + * if there is still application IO going on in this area. */ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) { unsigned int enr = BM_SECT_TO_EXT(sector); const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; + struct lc_element *e; struct bm_extent *bm_ext; int i; @@ -1203,8 +1212,8 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) "dropping %u, aparently got 'synced' by application io\n", mdev->resync_wenr); - bm_ext = (struct bm_extent *) - lc_find(mdev->resync, mdev->resync_wenr); + e = lc_find(mdev->resync, mdev->resync_wenr); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (bm_ext) { D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags)); D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags)); @@ -1217,7 +1226,9 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) dev_alert(DEV, "LOGIC BUG\n"); } } - bm_ext = (struct bm_extent *)lc_try_get(mdev->resync, enr); + /* TRY. */ + e = lc_try_get(mdev->resync, enr); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (bm_ext) { if (test_bit(BME_LOCKED, &bm_ext->flags)) goto proceed; @@ -1236,13 +1247,16 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) } goto check_al; } else { + /* do we rather want to try later? */ if (mdev->resync_locked > mdev->resync->nr_elements-3) { trace_drbd_resync(mdev, TRACE_LVL_ALL, "resync_locked = %u!\n", mdev->resync_locked); goto try_again; } - bm_ext = (struct bm_extent *)lc_get(mdev->resync, enr); + /* Do or do not. There is no try. -- Joda */ + e = lc_get(mdev->resync, enr); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (!bm_ext) { const unsigned long rs_flags = mdev->resync->flags; if (rs_flags & LC_STARVING) @@ -1254,7 +1268,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, (struct lc_element *)bm_ext); + lc_changed(mdev->resync, &bm_ext->lce); wake_up(&mdev->al_wait); D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); } @@ -1289,6 +1303,7 @@ try_again: void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) { unsigned int enr = BM_SECT_TO_EXT(sector); + struct lc_element *e; struct bm_extent *bm_ext; unsigned long flags; @@ -1297,7 +1312,8 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) (long long)sector, enr); spin_lock_irqsave(&mdev->al_lock, flags); - bm_ext = (struct bm_extent *) lc_find(mdev->resync, enr); + e = lc_find(mdev->resync, enr); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (!bm_ext) { spin_unlock_irqrestore(&mdev->al_lock, flags); dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n"); @@ -1312,7 +1328,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) return; } - if (lc_put(mdev->resync, (struct lc_element *)bm_ext) == 0) { + if (lc_put(mdev->resync, &bm_ext->lce) == 0) { clear_bit(BME_LOCKED, &bm_ext->flags); clear_bit(BME_NO_WRITES, &bm_ext->flags); mdev->resync_locked--; @@ -1323,8 +1339,8 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) } /** - * drbd_rs_cancel_all: Removes extents from the resync LRU. Even - * if they are BME_LOCKED. + * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED) + * @mdev: DRBD device. */ void drbd_rs_cancel_all(struct drbd_conf *mdev) { @@ -1343,13 +1359,15 @@ void drbd_rs_cancel_all(struct drbd_conf *mdev) } /** - * drbd_rs_del_all: Gracefully remove all extents from the resync LRU. - * there may be still a reference hold by someone. In that case this function - * returns -EAGAIN. - * In case all elements got removed it returns zero. + * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU + * @mdev: DRBD device. + * + * Returns 0 upon success, -EAGAIN if at least one reference count was + * not zero. */ int drbd_rs_del_all(struct drbd_conf *mdev) { + struct lc_element *e; struct bm_extent *bm_ext; int i; @@ -1360,7 +1378,8 @@ int drbd_rs_del_all(struct drbd_conf *mdev) if (get_ldev_if_state(mdev, D_FAILED)) { /* ok, ->resync is there. */ for (i = 0; i < mdev->resync->nr_elements; i++) { - bm_ext = (struct bm_extent *) lc_entry(mdev->resync, i); + e = lc_element_by_index(mdev->resync, i); + bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL; if (bm_ext->lce.lc_number == LC_FREE) continue; if (bm_ext->lce.lc_number == mdev->resync_wenr) { @@ -1392,10 +1411,11 @@ int drbd_rs_del_all(struct drbd_conf *mdev) return 0; } -/* Record information on a failure to resync the specified blocks - * - * called on C_SYNC_TARGET when resync write fails or P_NEG_RS_DREPLY received - * +/** + * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks + * @mdev: DRBD device. + * @sector: The sector number. + * @size: Size of failed IO operation, in byte. */ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) { diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 213fa12da121..d9b59b0611b0 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -80,6 +80,7 @@ struct drbd_bitmap { /* definition of bits in bm_flags */ #define BM_LOCKED 0 #define BM_MD_IO_ERROR 1 +#define BM_P_VMALLOCED 2 static int bm_is_locked(struct drbd_bitmap *b) { @@ -214,15 +215,23 @@ STATIC void bm_free_pages(struct page **pages, unsigned long number) } } +STATIC void bm_vk_free(void *ptr, int v) +{ + if (v) + vfree(ptr); + else + kfree(ptr); +} + /* * "have" and "want" are NUMBER OF PAGES. */ -STATIC struct page **bm_realloc_pages(struct page **old_pages, - unsigned long have, - unsigned long want) +STATIC struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) { + struct page **old_pages = b->bm_pages; struct page **new_pages, *page; - unsigned int i, bytes; + unsigned int i, bytes, vmalloced = 0; + unsigned long have = b->bm_number_of_pages; BUG_ON(have == 0 && old_pages != NULL); BUG_ON(have != 0 && old_pages == NULL); @@ -230,27 +239,15 @@ STATIC struct page **bm_realloc_pages(struct page **old_pages, if (have == want) return old_pages; - /* To use kmalloc here is ok, as long as we support 4TB at max... - * otherwise this might become bigger than 128KB, which is - * the maximum for kmalloc. - * - * no, it is not: on 64bit boxes, sizeof(void*) == 8, - * 128MB bitmap @ 4K pages -> 256K of page pointers. - * ==> use vmalloc for now again. - * then again, we could do something like - * if (nr_pages > watermark) vmalloc else kmalloc :*> ... - * or do cascading page arrays: - * one page for the page array of the page array, - * those pages for the real bitmap pages. - * there we could even add some optimization members, - * so we won't need to kmap_atomic in bm_find_next_bit just to see - * that the page has no bits set ... - * or we can try a "huge" page ;-) - */ + /* Trying kmalloc first, falling back to vmalloc... */ bytes = sizeof(struct page *)*want; - new_pages = vmalloc(bytes); - if (!new_pages) - return NULL; + new_pages = kmalloc(bytes, GFP_KERNEL); + if (!new_pages) { + new_pages = vmalloc(bytes); + if (!new_pages) + return NULL; + vmalloced = 1; + } memset(new_pages, 0, bytes); if (want >= have) { @@ -260,7 +257,7 @@ STATIC struct page **bm_realloc_pages(struct page **old_pages, page = alloc_page(GFP_HIGHUSER); if (!page) { bm_free_pages(new_pages + have, i - have); - vfree(new_pages); + bm_vk_free(new_pages, vmalloced); return NULL; } new_pages[i] = page; @@ -273,6 +270,11 @@ STATIC struct page **bm_realloc_pages(struct page **old_pages, */ } + if (vmalloced) + set_bit(BM_P_VMALLOCED, &b->bm_flags); + else + clear_bit(BM_P_VMALLOCED, &b->bm_flags); + return new_pages; } @@ -308,7 +310,7 @@ void drbd_bm_cleanup(struct drbd_conf *mdev) { ERR_IF (!mdev->bitmap) return; bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); - vfree(mdev->bitmap->bm_pages); + bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags)); kfree(mdev->bitmap); mdev->bitmap = NULL; } @@ -462,6 +464,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) unsigned long want, have, onpages; /* number of pages */ struct page **npages, **opages = NULL; int err = 0, growing; + int opages_vmalloced; ERR_IF(!b) return -ENOMEM; @@ -473,6 +476,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) if (capacity == b->bm_dev_capacity) goto out; + opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags); + if (capacity == 0) { spin_lock_irq(&b->bm_lock); opages = b->bm_pages; @@ -486,7 +491,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) b->bm_dev_capacity = 0; spin_unlock_irq(&b->bm_lock); bm_free_pages(opages, onpages); - vfree(opages); + bm_vk_free(opages, opages_vmalloced); goto out; } bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT)); @@ -499,7 +504,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) words = ALIGN(bits, 64) >> LN2_BPL; if (get_ldev(mdev)) { - D_ASSERT((u64)bits <= (((u64)mdev->bc->md.md_size_sect-MD_BM_OFFSET) << 12)); + D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12)); put_ldev(mdev); } @@ -513,7 +518,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC)) npages = NULL; else - npages = bm_realloc_pages(b->bm_pages, have, want); + npages = bm_realloc_pages(b, want); } if (!npages) { @@ -557,7 +562,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) spin_unlock_irq(&b->bm_lock); if (opages != npages) - vfree(opages); + bm_vk_free(opages, opages_vmalloced); dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words); out: @@ -753,15 +758,15 @@ STATIC void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int struct bio *bio = bio_alloc(GFP_KERNEL, 1); unsigned int len; sector_t on_disk_sector = - mdev->bc->md.md_offset + mdev->bc->md.bm_offset; + mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset; on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9); /* this might happen with very small * flexible external meta data device */ len = min_t(unsigned int, PAGE_SIZE, - (drbd_md_last_sector(mdev->bc) - on_disk_sector + 1)<<9); + (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9); - bio->bi_bdev = mdev->bc->md_bdev; + bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_sector = on_disk_sector; bio_add_page(bio, b->bm_pages[page_nr], len, 0); bio->bi_private = b; @@ -839,7 +844,7 @@ STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) for (i = 0; i < num_pages; i++) bm_page_io_async(mdev, b, i, rw); - drbd_blk_run_queue(bdev_get_queue(mdev->bc->md_bdev)); + drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev)); wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { @@ -870,9 +875,8 @@ STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) } /** - * drbd_bm_read: Read the whole bitmap from its on disk location. - * - * currently only called from "drbd_nl_disk_conf" + * drbd_bm_read() - Read the whole bitmap from its on disk location. + * @mdev: DRBD device. */ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) { @@ -880,9 +884,8 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) } /** - * drbd_bm_write: Write the whole bitmap to its on disk location. - * - * called at various occasions. + * drbd_bm_write() - Write the whole bitmap to its on disk location. + * @mdev: DRBD device. */ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) { @@ -890,16 +893,18 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) } /** - * drbd_bm_write_sect: Writes a 512 byte piece of the bitmap to its - * on disk location. On disk bitmap is little endian. - * - * @enr: The _sector_ offset from the start of the bitmap. + * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap + * @mdev: DRBD device. + * @enr: Extent number in the resync lru (happens to be sector offset) * + * The BM_EXT_SIZE is on purpose exactle the amount of the bitmap covered + * by a single sector write. Therefore enr == sector offset from the + * start of the bitmap. */ int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local) { - sector_t on_disk_sector = enr + mdev->bc->md.md_offset - + mdev->bc->md.bm_offset; + sector_t on_disk_sector = enr + mdev->ldev->md.md_offset + + mdev->ldev->md.bm_offset; int bm_words, num_words, offset; int err = 0; @@ -911,7 +916,7 @@ int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(lo memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE); drbd_bm_get_lel(mdev, offset, num_words, page_address(mdev->md_io_page)); - if (!drbd_md_sync_page_io(mdev, mdev->bc, on_disk_sector, WRITE)) { + if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) { int i; err = -EIO; dev_err(DEV, "IO ERROR writing bitmap sector %lu " diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c index 213234342e70..20fe72a104d3 100644 --- a/drivers/block/drbd/drbd_buildtag.c +++ b/drivers/block/drbd/drbd_buildtag.c @@ -2,6 +2,6 @@ #include const char *drbd_buildtag(void) { - return "GIT-hash: c522e740ae3163f5a5ff83c0c58d9f2801299961 drbd/drbd_int.h" - " build by phil@fat-tyre, 2009-05-05 17:15:39"; + return "GIT-hash: b0abb3832a730d4fbd145013f6f51fc977bba3cc drbd/drbd_int.h" + " build by phil@fat-tyre, 2009-05-15 11:54:26"; } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index dcc35bf67eea..83f9f33e65ea 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -40,7 +40,7 @@ #include #include #include -#include "lru_cache.h" +#include #ifdef __CHECKER__ # define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) @@ -180,10 +180,7 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { extern struct drbd_conf **minor_table; extern struct ratelimit_state drbd_ratelimit_state; -/*** - * on the wire - *********************************************************************/ - +/* on the wire */ enum drbd_packets { /* receiver (data socket) */ P_DATA = 0x00, @@ -891,7 +888,7 @@ struct drbd_conf { /* configured by drbdsetup */ struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ struct syncer_conf sync_conf; - struct drbd_backing_dev *bc __protected_by(local); + struct drbd_backing_dev *ldev __protected_by(local); sector_t p_size; /* partner's disk size */ struct request_queue *rq_queue; @@ -1148,7 +1145,7 @@ extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size) extern int drbd_send_bitmap(struct drbd_conf *mdev); extern int _drbd_send_bitmap(struct drbd_conf *mdev); extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode); -extern void drbd_free_bc(struct drbd_backing_dev *bc); +extern void drbd_free_bc(struct drbd_backing_dev *ldev); extern int drbd_io_error(struct drbd_conf *mdev, int forcedetach); extern void drbd_mdev_cleanup(struct drbd_conf *mdev); @@ -1210,10 +1207,10 @@ extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf /* resync bitmap */ /* 16MB sized 'bitmap extent' to track syncer usage */ struct bm_extent { - struct lc_element lce; int rs_left; /* number of bits set (out of sync) in this extent. */ int rs_failed; /* number of failed resync requests in this extent. */ unsigned long flags; + struct lc_element lce; }; #define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */ @@ -1536,7 +1533,9 @@ void drbd_bcast_ee(struct drbd_conf *mdev, const struct drbd_epoch_entry* e); -/** DRBD State macros: +/** + * DOC: DRBD State macros + * * These macros are used to express state changes in easily readable form. * * The NS macros expand to a mask and a value, that can be bit ored onto the @@ -1613,6 +1612,16 @@ static inline int _drbd_set_state(struct drbd_conf *mdev, return rv; } +/** + * drbd_request_state() - Reqest a state change + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * + * This is the most graceful way of requesting a state change. It is verbose + * quite verbose in case the state change is not possible, and all those + * state changes are globally serialized. + */ static inline int drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) @@ -1620,13 +1629,9 @@ static inline int drbd_request_state(struct drbd_conf *mdev, return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); } -/** - * drbd_chk_io_error: Handles the on_io_error setting, should be called from - * all io completion handlers. See also drbd_io_error(). - */ static inline void __drbd_chk_io_error(struct drbd_conf *mdev, int forcedetach) { - switch (mdev->bc->dc.on_io_error) { + switch (mdev->ldev->dc.on_io_error) { case EP_PASS_ON: if (!forcedetach) { if (printk_ratelimit()) @@ -1644,6 +1649,14 @@ static inline void __drbd_chk_io_error(struct drbd_conf *mdev, int forcedetach) } } +/** + * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers + * @mdev: DRBD device. + * @error: Error code passed to the IO completion callback + * @forcedetach: Force detach. I.e. the error happened while accessing the meta data + * + * See also drbd_io_error(). + */ static inline void drbd_chk_io_error(struct drbd_conf *mdev, int error, int forcedetach) { @@ -1655,9 +1668,13 @@ static inline void drbd_chk_io_error(struct drbd_conf *mdev, } } -/* Returns the first sector number of our meta data, - * which, for internal meta data, happens to be the maximum capacity - * we could agree upon with our peer + +/** + * drbd_md_first_sector() - Returns the first sector number of the meta data area + * @bdev: Meta data block device. + * + * BTW, for internal meta data, this happens to be the maximum capacity + * we could agree upon with our peer node. */ static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) { @@ -1671,8 +1688,10 @@ static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev) } } -/* returns the last sector number of our meta data, - * to be able to catch out of band md access */ +/** + * drbd_md_last_sector() - Return the last sector number of the meta data area + * @bdev: Meta data block device. + */ static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) { switch (bdev->dc.meta_dev_idx) { @@ -1685,16 +1704,19 @@ static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) } } -/* Returns the number of 512 byte sectors of the device */ static inline sector_t drbd_get_capacity(struct block_device *bdev) { - /* return bdev ? get_capacity(bdev->bd_disk) : 0; */ - return bdev ? bdev->bd_inode->i_size >> 9 : 0; + return bdev ? get_capacity(bdev->bd_disk) : 0; } -/* returns the capacity we announce to out peer. - * we clip ourselves at the various MAX_SECTORS, because if we don't, - * current implementation will oops sooner or later */ +/** + * drbd_get_max_capacity() - Returns the capacity we announce to out peer + * @bdev: Meta data block device. + * + * returns the capacity we announce to out peer. we clip ourselves at the + * various MAX_SECTORS, because if we don't, current implementation will + * oops sooner or later + */ static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) { sector_t s; @@ -1721,7 +1743,11 @@ static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev) return s; } -/* returns the sector number of our meta data 'super' block */ +/** + * drbd_md_ss__() - Return the sector number of our meta data super block + * @mdev: DRBD device. + * @bdev: Meta data block device. + */ static inline sector_t drbd_md_ss__(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { @@ -1909,8 +1935,10 @@ static inline void put_net_conf(struct drbd_conf *mdev) } /** - * get_net_conf: Returns TRUE when it is ok to access mdev->net_conf. You - * should call put_net_conf() when finished looking at mdev->net_conf. + * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there + * @mdev: DRBD device. + * + * You have to call put_net_conf() when finished working with mdev->net_conf. */ static inline int get_net_conf(struct drbd_conf *mdev) { @@ -1924,11 +1952,13 @@ static inline int get_net_conf(struct drbd_conf *mdev) } /** - * get_ldev: Returns TRUE when local IO is possible. If it returns - * TRUE you should call put_ldev() after IO is completed. + * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev + * @M: DRBD device. + * + * You have to call put_ldev() when finished working with mdev->ldev. */ -#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS)) #define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT)) +#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS)) static inline void put_ldev(struct drbd_conf *mdev) { @@ -2197,7 +2227,7 @@ static inline void drbd_blk_run_queue(struct request_queue *q) static inline void drbd_kick_lo(struct drbd_conf *mdev) { if (get_ldev(mdev)) { - drbd_blk_run_queue(bdev_get_queue(mdev->bc->backing_bdev)); + drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev)); put_ldev(mdev); } } @@ -2209,7 +2239,7 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) if (test_bit(MD_NO_BARRIER, &mdev->flags)) return; - r = blkdev_issue_flush(mdev->bc->md_bdev, NULL); + r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL); if (r) { set_bit(MD_NO_BARRIER, &mdev->flags); dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 3c377d326570..ad296842b960 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -188,7 +188,16 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) #endif -/************************* The transfer log start */ +/** + * DOC: The transfer log + * + * The transfer log is a single linked list of &struct drbd_tl_epoch objects. + * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail + * of the list. There is always at least one &struct drbd_tl_epoch object. + * + * Each &struct drbd_tl_epoch has a circular double linked list of requests + * attached. + */ STATIC int tl_init(struct drbd_conf *mdev) { struct drbd_tl_epoch *b; @@ -227,7 +236,11 @@ STATIC void tl_cleanup(struct drbd_conf *mdev) } /** - * _tl_add_barrier: Adds a barrier to the TL. + * _tl_add_barrier() - Adds a barrier to the transfer log + * @mdev: DRBD device. + * @new: Barrier to be added before the current head of the TL. + * + * The caller must hold the req_lock. */ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) { @@ -249,7 +262,16 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) } } -/* when we receive a barrier ack */ +/** + * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL + * @mdev: DRBD device. + * @barrier_nr: Expected identifier of the DRBD write barrier packet. + * @set_size: Expected number of requests before that barrier. + * + * In case the passed barrier_nr or set_size does not match the oldest + * &struct drbd_tl_epoch objects this function will cause a termination + * of the connection. + */ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, unsigned int set_size) { @@ -322,8 +344,14 @@ bail: } -/* called by drbd_disconnect (exiting receiver thread) - * or from some after_state_ch */ +/** + * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL + * @mdev: DRBD device. + * + * This is called after the connection to the peer was lost. The storage covered + * by the requests on the transfer gets marked as our of sync. Called from the + * receiver thread and the worker thread. + */ void tl_clear(struct drbd_conf *mdev) { struct drbd_tl_epoch *b, *tmp; @@ -383,16 +411,14 @@ void tl_clear(struct drbd_conf *mdev) } /** - * drbd_io_error: Handles the on_io_error setting, should be called in the - * unlikely(!drbd_bio_uptodate(e->bio)) case from kernel thread context. - * See also drbd_chk_io_error + * drbd_io_error() - Detach from the local disk of so configured with the on_io_error setting + * @mdev: DRBD device. + * @force_detach: Detach no matter how on_io_error is set (meta data IO error) * - * NOTE: we set ourselves FAILED here if on_io_error is EP_DETACH or Panic OR - * if the forcedetach flag is set. This flag is set when failures - * occur writing the meta data portion of the disk as they are - * not recoverable. + * Should be called in the unlikely(!drbd_bio_uptodate(e->bio)) case from + * kernel thread context. See also drbd_chk_io_error(). */ -int drbd_io_error(struct drbd_conf *mdev, int forcedetach) +int drbd_io_error(struct drbd_conf *mdev, int force_detach) { enum drbd_io_error_p eh; unsigned long flags; @@ -401,11 +427,11 @@ int drbd_io_error(struct drbd_conf *mdev, int forcedetach) eh = EP_PASS_ON; if (get_ldev_if_state(mdev, D_FAILED)) { - eh = mdev->bc->dc.on_io_error; + eh = mdev->ldev->dc.on_io_error; put_ldev(mdev); } - if (!forcedetach && eh == EP_PASS_ON) + if (!force_detach && eh == EP_PASS_ON) return 1; spin_lock_irqsave(&mdev->req_lock, flags); @@ -441,9 +467,10 @@ int drbd_io_error(struct drbd_conf *mdev, int forcedetach) } /** - * cl_wide_st_chg: - * Returns TRUE if this state change should be preformed as a cluster wide - * transaction. Of course it returns 0 as soon as the connection is lost. + * cl_wide_st_chg() - TRUE if the state change is a cluster wide one + * @mdev: DRBD device. + * @os: old (current) state. + * @ns: new (wanted) state. */ STATIC int cl_wide_st_chg(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns) @@ -474,6 +501,12 @@ int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, return rv; } +/** + * drbd_force_state() - Impose a change which happens outside our control on our state + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + */ void drbd_force_state(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) { @@ -523,10 +556,14 @@ STATIC enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, } /** - * _drbd_request_state: - * This function is the most gracefull way to change state. For some state - * transition this function even does a cluster wide transaction. - * It has a cousin named drbd_request_state(), which is always verbose. + * drbd_req_state() - Perform an eventually cluster wide state change + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * @f: flags + * + * Should not be called directly, use drbd_request_state() or + * _drbd_request_state(). */ STATIC int drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val, @@ -601,10 +638,14 @@ abort: } /** - * _drbd_request_state: - * This function is the most gracefull way to change state. For some state - * transition this function even does a cluster wide transaction. - * It has a cousin named drbd_request_state(), which is always verbose. + * _drbd_request_state() - Reqest a state change (with flags) + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * @f: flags + * + * Cousin of drbd_request_state(), use full with the CS_WAIT_COMPLETE + * flag, or when logging of failed state change requests is not desired. */ int _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val, enum chg_state_flags f) @@ -659,6 +700,11 @@ void print_st_err(struct drbd_conf *mdev, A##s_to_name(ns.A)); \ } }) +/** + * is_valid_state() - Returns an SS_ error code if ns is not valid + * @mdev: DRBD device. + * @ns: State to consider. + */ STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) { /* See drbd_state_sw_errors in drbd_strings.c */ @@ -668,7 +714,7 @@ STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->bc->dc.fencing; + fp = mdev->ldev->dc.fencing; put_ldev(mdev); } @@ -718,6 +764,12 @@ STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) return rv; } +/** + * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible + * @mdev: DRBD device. + * @ns: new state. + * @os: old state. + */ STATIC int is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, union drbd_state os) { @@ -759,6 +811,16 @@ STATIC int is_valid_state_transition(struct drbd_conf *mdev, return rv; } +/** + * sanitize_state() - Resolves implicitly necessary additional changes to a state transition + * @mdev: DRBD device. + * @os: old state. + * @ns: new state. + * @warn_sync_abort: + * + * When we loose connection, we have to set the state of the peers disk (pdsk) + * to D_UNKNOWN. This rule and many more along those lines are in this function. + */ STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, int *warn_sync_abort) { @@ -766,7 +828,7 @@ STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->bc->dc.fencing; + fp = mdev->ldev->dc.fencing; put_ldev(mdev); } @@ -791,7 +853,7 @@ STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns.pdsk = D_UNKNOWN; } - /* Clear the aftr_isp when becomming Unconfigured */ + /* Clear the aftr_isp when becoming unconfigured */ if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) ns.aftr_isp = 0; @@ -854,7 +916,7 @@ STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state /* Connection breaks down before we finished "Negotiating" */ if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && get_ldev_if_state(mdev, D_NEGOTIATING)) { - if (mdev->ed_uuid == mdev->bc->md.uuid[UI_CURRENT]) { + if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) { ns.disk = mdev->new_state_tmp.disk; ns.pdsk = mdev->new_state_tmp.pdsk; } else { @@ -886,6 +948,15 @@ STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state return ns; } +/** + * __drbd_set_state() - Set a new DRBD state + * @mdev: DRBD device. + * @ns: new state. + * @flags: Flags + * @done: Optional completion, that will get completed after the after_state_ch() finished + * + * Caller needs to hold req_lock, and global_state_lock. Do not call directly. + */ int __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, enum chg_state_flags flags, struct completion *done) @@ -966,7 +1037,7 @@ int __drbd_set_state(struct drbd_conf *mdev, wake_up(&mdev->misc_wait); wake_up(&mdev->state_wait); - /** post-state-change actions **/ + /* post-state-change actions */ if (os.conn >= C_SYNC_SOURCE && ns.conn <= C_CONNECTED) { set_bit(STOP_SYNC_TIMER, &mdev->flags); mod_timer(&mdev->resync_timer, jiffies); @@ -1010,7 +1081,7 @@ int __drbd_set_state(struct drbd_conf *mdev, } if (get_ldev(mdev)) { - u32 mdf = mdev->bc->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| + u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); @@ -1027,12 +1098,12 @@ int __drbd_set_state(struct drbd_conf *mdev, mdf |= MDF_WAS_UP_TO_DATE; if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT) mdf |= MDF_PEER_OUT_DATED; - if (mdf != mdev->bc->md.flags) { - mdev->bc->md.flags = mdf; + if (mdf != mdev->ldev->md.flags) { + mdev->ldev->md.flags = mdf; drbd_md_mark_dirty(mdev); } if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) - drbd_set_ed_uuid(mdev, mdev->bc->md.uuid[UI_CURRENT]); + drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]); put_ldev(mdev); } @@ -1102,6 +1173,13 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv) } } +/** + * after_state_ch() - Perform after state change actions that may sleep + * @mdev: DRBD device. + * @os: old state. + * @ns: new state. + * @flags: Flags + */ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { @@ -1115,7 +1193,7 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->bc->dc.fencing; + fp = mdev->ldev->dc.fencing; put_ldev(mdev); } @@ -1159,7 +1237,7 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, mdev->p_uuid = NULL; if (get_ldev(mdev)) { if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && - mdev->bc->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { + mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { drbd_uuid_new_current(mdev); drbd_send_uuids(mdev); } @@ -1168,7 +1246,7 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { - if (ns.peer == R_PRIMARY && mdev->bc->md.uuid[UI_BITMAP] == 0) + if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) drbd_uuid_new_current(mdev); /* D_DISKLESS Peer becomes secondary */ @@ -1224,13 +1302,13 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, mdev->rs_failed = 0; atomic_set(&mdev->rs_pending_cnt, 0); - lc_free(mdev->resync); + lc_destroy(mdev->resync); mdev->resync = NULL; - lc_free(mdev->act_log); + lc_destroy(mdev->act_log); mdev->act_log = NULL; __no_warn(local, - drbd_free_bc(mdev->bc); - mdev->bc = NULL;); + drbd_free_bc(mdev->ldev); + mdev->ldev = NULL;); if (mdev->md_io_tmpp) __free_page(mdev->md_io_tmpp); @@ -1273,12 +1351,13 @@ STATIC int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; struct drbd_conf *mdev = thi->mdev; + unsigned long flags; int retval; restart: retval = thi->function(thi); - spin_lock(&thi->t_lock); + spin_lock_irqsave(&thi->t_lock, flags); /* if the receiver has been "Exiting", the last thing it did * was set the conn state to "StandAlone", @@ -1293,7 +1372,7 @@ restart: if (thi->t_state == Restarting) { dev_info(DEV, "Restarting %s\n", current->comm); thi->t_state = Running; - spin_unlock(&thi->t_lock); + spin_unlock_irqrestore(&thi->t_lock, flags); goto restart; } @@ -1301,7 +1380,7 @@ restart: thi->t_state = None; smp_mb(); complete(&thi->stop); - spin_unlock(&thi->t_lock); + spin_unlock_irqrestore(&thi->t_lock, flags); dev_info(DEV, "Terminating %s\n", current->comm); @@ -1324,12 +1403,17 @@ int drbd_thread_start(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; struct task_struct *nt; + unsigned long flags; + const char *me = thi == &mdev->receiver ? "receiver" : thi == &mdev->asender ? "asender" : thi == &mdev->worker ? "worker" : "NONSENSE"; - spin_lock(&thi->t_lock); + /* is used from state engine doing drbd_thread_stop_nowait, + * while holding the req lock irqsave */ + spin_lock_irqsave(&thi->t_lock, flags); + switch (thi->t_state) { case None: dev_info(DEV, "Starting %s thread (from %s [%d])\n", @@ -1338,14 +1422,14 @@ int drbd_thread_start(struct drbd_thread *thi) /* Get ref on module for thread - this is released when thread exits */ if (!try_module_get(THIS_MODULE)) { dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); - spin_unlock(&thi->t_lock); + spin_unlock_irqrestore(&thi->t_lock, flags); return FALSE; } D_ASSERT(thi->task == NULL); thi->reset_cpu_mask = 1; thi->t_state = Running; - spin_unlock(&thi->t_lock); + spin_unlock_irqrestore(&thi->t_lock, flags); flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ nt = kthread_create(drbd_thread_setup, (void *) thi, @@ -1371,7 +1455,7 @@ int drbd_thread_start(struct drbd_thread *thi) case Running: case Restarting: default: - spin_unlock(&thi->t_lock); + spin_unlock_irqrestore(&thi->t_lock, flags); break; } @@ -1381,12 +1465,14 @@ int drbd_thread_start(struct drbd_thread *thi) void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) { + unsigned long flags; enum drbd_thread_state ns = restart ? Restarting : Exiting; - spin_lock(&thi->t_lock); + /* may be called from state engine, holding the req lock irqsave */ + spin_lock_irqsave(&thi->t_lock, flags); if (thi->t_state == None) { - spin_unlock(&thi->t_lock); + spin_unlock_irqrestore(&thi->t_lock, flags); if (restart) drbd_thread_start(thi); return; @@ -1394,7 +1480,7 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) if (thi->t_state != ns) { if (thi->task == NULL) { - spin_unlock(&thi->t_lock); + spin_unlock_irqrestore(&thi->t_lock, flags); return; } @@ -1406,7 +1492,7 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) } - spin_unlock(&thi->t_lock); + spin_unlock_irqrestore(&thi->t_lock, flags); if (wait) wait_for_completion(&thi->stop); @@ -1414,7 +1500,9 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) #ifdef CONFIG_SMP /** - * drbd_calc_cpu_mask: Generates CPU masks, sprad over all CPUs. + * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs + * @mdev: DRBD device. + * * Forces all threads of a device onto the same CPU. This is benificial for * DRBD's performance. May be overwritten by user's configuration. */ @@ -1439,9 +1527,12 @@ cpumask_t drbd_calc_cpu_mask(struct drbd_conf *mdev) return (cpumask_t) CPU_MASK_ALL; /* Never reached. */ } -/* modifies the cpu mask of the _current_ thread, - * call in the "main loop" of _all_ threads. - * no need for any mutex, current won't die prematurely. +/** + * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread + * @mdev: DRBD device. + * + * call in the "main loop" of _all_ threads, no need for any mutex, current won't die + * prematurely. */ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) { @@ -1623,7 +1714,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) return 1; for (i = UI_CURRENT; i < UI_SIZE; i++) - p.uuid[i] = mdev->bc ? cpu_to_be64(mdev->bc->md.uuid[i]) : 0; + p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0; mdev->comm_bm_set = drbd_bm_total_weight(mdev); p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); @@ -1667,9 +1758,9 @@ int drbd_send_sizes(struct drbd_conf *mdev) int ok; if (get_ldev_if_state(mdev, D_NEGOTIATING)) { - D_ASSERT(mdev->bc->backing_bdev); - d_size = drbd_get_max_capacity(mdev->bc); - u_size = mdev->bc->dc.disk_size; + D_ASSERT(mdev->ldev->backing_bdev); + d_size = drbd_get_max_capacity(mdev->ldev); + u_size = mdev->ldev->dc.disk_size; q_order_type = drbd_queue_order_type(mdev); p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev)); put_ldev(mdev); @@ -1691,11 +1782,8 @@ int drbd_send_sizes(struct drbd_conf *mdev) } /** - * drbd_send_state: - * Informs the peer about our state. Only call it when - * mdev->state.conn >= C_CONNECTED (I.e. you may not call it while in - * WFReportParams. Though there is one valid and necessary exception, - * drbd_connect() calls drbd_send_state() while in it WFReportParams. + * drbd_send_state() - Sends the drbd state to the peer + * @mdev: DRBD device. */ int drbd_send_state(struct drbd_conf *mdev) { @@ -1906,7 +1994,7 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) } if (get_ldev(mdev)) { - if (drbd_md_test_flag(mdev->bc, MDF_FULL_SYNC)) { + if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n"); drbd_bm_set_all(mdev); if (drbd_bm_write(mdev)) { @@ -1962,9 +2050,12 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) } /** - * _drbd_send_ack: - * This helper function expects the sector and block_id parameter already - * in big endian! + * _drbd_send_ack() - Sends an ack packet + * @mdev: DRBD device. + * @cmd: Packet command code. + * @sector: sector, needs to be in big endian byte order + * @blksize: size in byte, needs to be in big endian byte order + * @block_id: Id, big endian byte order */ STATIC int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, u64 sector, @@ -2003,6 +2094,12 @@ int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); } +/** + * drbd_send_ack() - Sends an ack packet + * @mdev: DRBD device. + * @cmd: Packet command code. + * @e: Epoch entry. + */ int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, struct drbd_epoch_entry *e) { @@ -2763,10 +2860,8 @@ static void drbd_delete_device(unsigned int minor) mdev->ee_hash = NULL; */ - if (mdev->act_log) - lc_free(mdev->act_log); - if (mdev->resync) - lc_free(mdev->resync); + lc_destroy(mdev->act_log); + lc_destroy(mdev->resync); kfree(mdev->p_uuid); /* mdev->p_uuid = NULL; */ @@ -2806,9 +2901,11 @@ STATIC void drbd_cleanup(void) } /** - * drbd_congested: Returns 1<bc->backing_bdev); + q = bdev_get_queue(mdev->ldev->backing_bdev); r = bdi_congested(&q->backing_dev_info, bdi_bits); put_ldev(mdev); if (r) @@ -3029,18 +3126,18 @@ Enomem: return err; } -void drbd_free_bc(struct drbd_backing_dev *bc) +void drbd_free_bc(struct drbd_backing_dev *ldev) { - if (bc == NULL) + if (ldev == NULL) return; - bd_release(bc->backing_bdev); - bd_release(bc->md_bdev); + bd_release(ldev->backing_bdev); + bd_release(ldev->md_bdev); - fput(bc->lo_file); - fput(bc->md_file); + fput(ldev->lo_file); + fput(ldev->md_file); - kfree(bc); + kfree(ldev); } void drbd_free_sock(struct drbd_conf *mdev) @@ -3072,11 +3169,10 @@ void drbd_free_resources(struct drbd_conf *mdev) drbd_free_sock(mdev); __no_warn(local, - drbd_free_bc(mdev->bc); - mdev->bc = NULL;); + drbd_free_bc(mdev->ldev); + mdev->ldev = NULL;); } -/*********************************/ /* meta data management */ struct meta_data_on_disk { @@ -3097,8 +3193,8 @@ struct meta_data_on_disk { } __packed; /** - * drbd_md_sync: - * Writes the meta data super block if the MD_DIRTY flag bit is set. + * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set + * @mdev: DRBD device. */ void drbd_md_sync(struct drbd_conf *mdev) { @@ -3115,7 +3211,7 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!get_ldev_if_state(mdev, D_FAILED)) return; - trace_drbd_md_io(mdev, WRITE, mdev->bc); + trace_drbd_md_io(mdev, WRITE, mdev->ldev); mutex_lock(&mdev->md_io_mutex); buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); @@ -3123,22 +3219,22 @@ void drbd_md_sync(struct drbd_conf *mdev) buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); for (i = UI_CURRENT; i < UI_SIZE; i++) - buffer->uuid[i] = cpu_to_be64(mdev->bc->md.uuid[i]); - buffer->flags = cpu_to_be32(mdev->bc->md.flags); + buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]); + buffer->flags = cpu_to_be32(mdev->ldev->md.flags); buffer->magic = cpu_to_be32(DRBD_MD_MAGIC); - buffer->md_size_sect = cpu_to_be32(mdev->bc->md.md_size_sect); - buffer->al_offset = cpu_to_be32(mdev->bc->md.al_offset); + buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect); + buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset); buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements); buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE); - buffer->device_uuid = cpu_to_be64(mdev->bc->md.device_uuid); + buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid); - buffer->bm_offset = cpu_to_be32(mdev->bc->md.bm_offset); + buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset); - D_ASSERT(drbd_md_ss__(mdev, mdev->bc) == mdev->bc->md.md_offset); - sector = mdev->bc->md.md_offset; + D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); + sector = mdev->ldev->md.md_offset; - if (drbd_md_sync_page_io(mdev, mdev->bc, sector, WRITE)) { + if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { clear_bit(MD_DIRTY, &mdev->flags); } else { /* this was a try anyways ... */ @@ -3148,20 +3244,21 @@ void drbd_md_sync(struct drbd_conf *mdev) drbd_io_error(mdev, TRUE); } - /* Update mdev->bc->md.la_size_sect, + /* Update mdev->ldev->md.la_size_sect, * since we updated it on metadata. */ - mdev->bc->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); + mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); mutex_unlock(&mdev->md_io_mutex); put_ldev(mdev); } /** - * drbd_md_read: - * @bdev: describes the backing storage and the meta-data storage - * Reads the meta data from bdev. Return 0 (NO_ERROR) on success, and an - * enum drbd_ret_codes in case something goes wrong. - * Currently only: ERR_IO_MD_DISK, MDInvalid. + * drbd_md_read() - Reads in the meta data super block + * @mdev: DRBD device. + * @bdev: Device from which the meta data should be read in. + * + * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case + * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID. */ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { @@ -3233,7 +3330,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } /** - * drbd_md_mark_dirty: + * drbd_md_mark_dirty() - Mark meta data super block as dirty + * @mdev: DRBD device. + * * Call this function if you change enything that should be written to * the meta-data super block. This function sets MD_DIRTY, and starts a * timer that ensures that within five seconds you have to call drbd_md_sync(). @@ -3250,7 +3349,7 @@ STATIC void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) int i; for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) { - mdev->bc->md.uuid[i+1] = mdev->bc->md.uuid[i]; + mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; trace_drbd_uuid(mdev, i+1); } @@ -3267,7 +3366,7 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) drbd_set_ed_uuid(mdev, val); } - mdev->bc->md.uuid[idx] = val; + mdev->ldev->md.uuid[idx] = val; trace_drbd_uuid(mdev, idx); drbd_md_mark_dirty(mdev); } @@ -3275,16 +3374,18 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) { - if (mdev->bc->md.uuid[idx]) { + if (mdev->ldev->md.uuid[idx]) { drbd_uuid_move_history(mdev); - mdev->bc->md.uuid[UI_HISTORY_START] = mdev->bc->md.uuid[idx]; + mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; trace_drbd_uuid(mdev, UI_HISTORY_START); } _drbd_uuid_set(mdev, idx, val); } /** - * drbd_uuid_new_current: + * drbd_uuid_new_current() - Creates a new current UUID + * @mdev: DRBD device. + * * Creates a new current UUID, and rotates the old current UUID into * the bitmap slot. Causes an incremental resync upon next connect. */ @@ -3293,8 +3394,8 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) u64 val; dev_info(DEV, "Creating new current UUID\n"); - D_ASSERT(mdev->bc->md.uuid[UI_BITMAP] == 0); - mdev->bc->md.uuid[UI_BITMAP] = mdev->bc->md.uuid[UI_CURRENT]; + D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0); + mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; trace_drbd_uuid(mdev, UI_BITMAP); get_random_bytes(&val, sizeof(u64)); @@ -3303,21 +3404,21 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local) void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) { - if (mdev->bc->md.uuid[UI_BITMAP] == 0 && val == 0) + if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0) return; if (val == 0) { drbd_uuid_move_history(mdev); - mdev->bc->md.uuid[UI_HISTORY_START] = mdev->bc->md.uuid[UI_BITMAP]; - mdev->bc->md.uuid[UI_BITMAP] = 0; + mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; + mdev->ldev->md.uuid[UI_BITMAP] = 0; trace_drbd_uuid(mdev, UI_HISTORY_START); trace_drbd_uuid(mdev, UI_BITMAP); } else { - if (mdev->bc->md.uuid[UI_BITMAP]) + if (mdev->ldev->md.uuid[UI_BITMAP]) dev_warn(DEV, "bm UUID already set"); - mdev->bc->md.uuid[UI_BITMAP] = val; - mdev->bc->md.uuid[UI_BITMAP] &= ~((u64)1); + mdev->ldev->md.uuid[UI_BITMAP] = val; + mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1); trace_drbd_uuid(mdev, UI_BITMAP); } @@ -3325,9 +3426,10 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local) } /** - * drbd_bmio_set_n_write: - * Is an io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() that sets - * all bits in the bitmap and writes the whole bitmap to stable storage. + * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() + * @mdev: DRBD device. + * + * Sets all bits in the bitmap and writes the whole bitmap to stable storage. */ int drbd_bmio_set_n_write(struct drbd_conf *mdev) { @@ -3352,9 +3454,10 @@ int drbd_bmio_set_n_write(struct drbd_conf *mdev) } /** - * drbd_bmio_clear_n_write: - * Is an io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() that clears - * all bits in the bitmap and writes the whole bitmap to stable storage. + * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() + * @mdev: DRBD device. + * + * Clears all bits in the bitmap and writes the whole bitmap to stable storage. */ int drbd_bmio_clear_n_write(struct drbd_conf *mdev) { @@ -3393,13 +3496,16 @@ STATIC int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) } /** - * drbd_queue_bitmap_io: - * Queues an IO operation on the whole bitmap. + * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap + * @mdev: DRBD device. + * @io_fn: IO callback to be called when bitmap IO is possible + * @done: callback to be called after the bitmap IO was performed + * @why: Descriptive text of the reason for doing the IO + * * While IO on the bitmap happens we freeze appliation IO thus we ensure - * that drbd_set_out_of_sync() can not be called. - * This function MUST ONLY be called from worker context. - * BAD API ALERT! - * It MUST NOT be used while a previous such work is still pending! + * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be + * called from worker context. It MUST NOT be used while a previous such + * work is still pending! */ void drbd_queue_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), @@ -3430,9 +3536,13 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, } /** - * drbd_bitmap_io: - * Does an IO operation on the bitmap, freezing application IO while that - * IO operations runs. This functions MUST NOT be called from worker context. + * drbd_bitmap_io() - Does an IO operation on the whole bitmap + * @mdev: DRBD device. + * @io_fn: IO callback to be called when bitmap IO is possible + * @why: Descriptive text of the reason for doing the IO + * + * freezes application IO while that the actual IO operations runs. This + * functions MAY NOT be called from worker context. */ int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why) { @@ -3453,17 +3563,17 @@ int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), cha void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local) { - if ((mdev->bc->md.flags & flag) != flag) { + if ((mdev->ldev->md.flags & flag) != flag) { drbd_md_mark_dirty(mdev); - mdev->bc->md.flags |= flag; + mdev->ldev->md.flags |= flag; } } void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local) { - if ((mdev->bc->md.flags & flag) != 0) { + if ((mdev->ldev->md.flags & flag) != 0) { drbd_md_mark_dirty(mdev); - mdev->bc->md.flags &= ~flag; + mdev->ldev->md.flags &= ~flag; } } int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 55dbf83d559f..c6217d6a2465 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -165,7 +165,7 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) D_ASSERT(mdev->state.pdsk == D_UNKNOWN); if (get_ldev_if_state(mdev, D_CONSISTENT)) { - fp = mdev->bc->dc.fencing; + fp = mdev->ldev->dc.fencing; put_ldev(mdev); } else { dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n"); @@ -313,7 +313,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (new_role == R_SECONDARY) { set_disk_ro(mdev->vdisk, TRUE); if (get_ldev(mdev)) { - mdev->bc->md.uuid[UI_CURRENT] &= ~(u64)1; + mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; put_ldev(mdev); } } else { @@ -325,10 +325,10 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (get_ldev(mdev)) { if (((mdev->state.conn < C_CONNECTED || mdev->state.pdsk <= D_FAILED) - && mdev->bc->md.uuid[UI_BITMAP] == 0) || forced) + && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced) drbd_uuid_new_current(mdev); - mdev->bc->md.uuid[UI_CURRENT] |= (u64)1; + mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; put_ldev(mdev); } } @@ -463,10 +463,10 @@ void drbd_resume_io(struct drbd_conf *mdev) } /** - * drbd_determin_dev_size: - * Evaluates all constraints and sets our correct device size. - * Negative return values indicate errors. 0 and positive values - * indicate success. + * drbd_determin_dev_size() - Sets the right device size obeying all constraints + * @mdev: DRBD device. + * + * Returns 0 on success, negative return values indicate errors. * You should call drbd_md_sync() after calling this function. */ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local) @@ -493,14 +493,14 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_ho /* no wait necessary anymore, actually we could assert that */ wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); - prev_first_sect = drbd_md_first_sector(mdev->bc); - prev_size = mdev->bc->md.md_size_sect; - la_size = mdev->bc->md.la_size_sect; + prev_first_sect = drbd_md_first_sector(mdev->ldev); + prev_size = mdev->ldev->md.md_size_sect; + la_size = mdev->ldev->md.la_size_sect; /* TODO: should only be some assert here, not (re)init... */ - drbd_md_set_sector_offsets(mdev, mdev->bc); + drbd_md_set_sector_offsets(mdev, mdev->ldev); - size = drbd_new_dev_size(mdev, mdev->bc); + size = drbd_new_dev_size(mdev, mdev->ldev); if (drbd_get_capacity(mdev->this_bdev) != size || drbd_bm_capacity(mdev) != size) { @@ -521,17 +521,17 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_ho } /* racy, see comments above. */ drbd_set_my_capacity(mdev, size); - mdev->bc->md.la_size_sect = size; + mdev->ldev->md.la_size_sect = size; dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), (unsigned long long)size>>1); } if (rv == dev_size_error) goto out; - la_size_changed = (la_size != mdev->bc->md.la_size_sect); + la_size_changed = (la_size != mdev->ldev->md.la_size_sect); - md_moved = prev_first_sect != drbd_md_first_sector(mdev->bc) - || prev_size != mdev->bc->md.md_size_sect; + md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev) + || prev_size != mdev->ldev->md.md_size_sect; if (md_moved) { dev_warn(DEV, "Moving meta-data.\n"); @@ -600,11 +600,12 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } /** - * drbd_check_al_size: - * checks that the al lru is of requested size, and if neccessary tries to - * allocate a new one. returns -EBUSY if current al lru is still used, - * -ENOMEM when allocation failed, and 0 on success. You should call - * drbd_md_sync() after you called this function. + * drbd_check_al_size() - Ensures that the AL is of the right size + * @mdev: DRBD device. + * + * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation + * failed, and 0 on success. You should call drbd_md_sync() after you called + * this function. */ STATIC int drbd_check_al_size(struct drbd_conf *mdev) { @@ -622,8 +623,8 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_alloc("act_log", mdev->sync_conf.al_extents, - sizeof(struct lc_element), mdev); + n = lc_create("act_log", mdev->sync_conf.al_extents, + sizeof(struct lc_element), 0); if (n == NULL) { dev_err(DEV, "Cannot allocate act_log lru!\n"); @@ -632,7 +633,7 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev) spin_lock_irq(&mdev->al_lock); if (t) { for (i = 0; i < t->nr_elements; i++) { - e = lc_entry(t, i); + e = lc_element_by_index(t, i); if (e->refcnt) dev_err(DEV, "refcnt(%d)==%d\n", e->lc_number, e->refcnt); @@ -644,11 +645,11 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev) spin_unlock_irq(&mdev->al_lock); if (in_use) { dev_err(DEV, "Activity log still in use!\n"); - lc_free(n); + lc_destroy(n); return -EBUSY; } else { if (t) - lc_free(t); + lc_destroy(t); } drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */ return 0; @@ -657,11 +658,11 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev) void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local) { struct request_queue * const q = mdev->rq_queue; - struct request_queue * const b = mdev->bc->backing_bdev->bd_disk->queue; + struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; /* unsigned int old_max_seg_s = q->max_segment_size; */ - int max_segments = mdev->bc->dc.max_bio_bvecs; + int max_segments = mdev->ldev->dc.max_bio_bvecs; - if (b->merge_bvec_fn && !mdev->bc->dc.use_bmbv) + if (b->merge_bvec_fn && !mdev->ldev->dc.use_bmbv) max_seg_s = PAGE_SIZE; max_seg_s = min(b->max_sectors * b->hardsect_size, max_seg_s); @@ -816,7 +817,8 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto fail; } - resync_lru = lc_alloc("resync", 61, sizeof(struct bm_extent), mdev); + resync_lru = lc_create("resync", 61, sizeof(struct bm_extent), + offsetof(struct bm_extent, lce)); if (!resync_lru) { retcode = ERR_NOMEM; goto release_bdev_fail; @@ -964,8 +966,8 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * Devices and memory are no longer released by error cleanup below. * now mdev takes over responsibility, and the state engine should * clean it up somewhere. */ - D_ASSERT(mdev->bc == NULL); - mdev->bc = nbc; + D_ASSERT(mdev->ldev == NULL); + mdev->ldev = nbc; mdev->resync = resync_lru; nbc = NULL; resync_lru = NULL; @@ -973,12 +975,12 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp mdev->write_ordering = WO_bio_barrier; drbd_bump_write_ordering(mdev, WO_bio_barrier); - if (drbd_md_test_flag(mdev->bc, MDF_CRASHED_PRIMARY)) + if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY)) set_bit(CRASHED_PRIMARY, &mdev->flags); else clear_bit(CRASHED_PRIMARY, &mdev->flags); - if (drbd_md_test_flag(mdev->bc, MDF_PRIMARY_IND)) { + if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND)) { set_bit(CRASHED_PRIMARY, &mdev->flags); cp_discovered = 1; } @@ -1006,8 +1008,8 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp */ clear_bit(USE_DEGR_WFC_T, &mdev->flags); if (mdev->state.role != R_PRIMARY && - drbd_md_test_flag(mdev->bc, MDF_PRIMARY_IND) && - !drbd_md_test_flag(mdev->bc, MDF_CONNECTED_IND)) + drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && + !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) set_bit(USE_DEGR_WFC_T, &mdev->flags); dd = drbd_determin_dev_size(mdev); @@ -1017,7 +1019,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } else if (dd == grew) set_bit(RESYNC_AFTER_NEG, &mdev->flags); - if (drbd_md_test_flag(mdev->bc, MDF_FULL_SYNC)) { + if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { dev_info(DEV, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) { @@ -1044,8 +1046,8 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state, otherwise into D_CONSISTENT state. */ - if (drbd_md_test_flag(mdev->bc, MDF_CONSISTENT)) { - if (drbd_md_test_flag(mdev->bc, MDF_WAS_UP_TO_DATE)) + if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) { + if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE)) ns.disk = D_CONSISTENT; else ns.disk = D_OUTDATED; @@ -1053,11 +1055,11 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp ns.disk = D_INCONSISTENT; } - if (drbd_md_test_flag(mdev->bc, MDF_PEER_OUT_DATED)) + if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED)) ns.pdsk = D_OUTDATED; if ( ns.disk == D_CONSISTENT && - (ns.pdsk == D_OUTDATED || mdev->bc->dc.fencing == FP_DONT_CARE)) + (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE)) ns.disk = D_UP_TO_DATE; /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, @@ -1081,9 +1083,9 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto force_diskless_dec; if (mdev->state.role == R_PRIMARY) - mdev->bc->md.uuid[UI_CURRENT] |= (u64)1; + mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; else - mdev->bc->md.uuid[UI_CURRENT] &= ~(u64)1; + mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; drbd_md_mark_dirty(mdev); drbd_md_sync(mdev); @@ -1113,8 +1115,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp fput(nbc->md_file); kfree(nbc); } - if (resync_lru) - lc_free(resync_lru); + lc_destroy(resync_lru); reply->ret_code = retcode; drbd_reconfig_done(mdev); @@ -1456,12 +1457,12 @@ STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } - if (mdev->bc->known_size != drbd_get_capacity(mdev->bc->backing_bdev)) { - mdev->bc->known_size = drbd_get_capacity(mdev->bc->backing_bdev); + if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { + mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); ldsc = 1; } - mdev->bc->dc.disk_size = (sector_t)rs.resize_size; + mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; dd = drbd_determin_dev_size(mdev); drbd_md_sync(mdev); put_ldev(mdev); @@ -1736,7 +1737,7 @@ STATIC int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl tl = reply->tag_list; if (get_ldev(mdev)) { - tl = disk_conf_to_tags(mdev, &mdev->bc->dc, tl); + tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl); put_ldev(mdev); } @@ -1788,11 +1789,11 @@ STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* This is a hand crafted add tag ;) */ *tl++ = T_uuids; *tl++ = UI_SIZE*sizeof(u64); - memcpy(tl, mdev->bc->md.uuid, UI_SIZE*sizeof(u64)); + memcpy(tl, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64)); tl = (unsigned short *)((char *)tl + UI_SIZE*sizeof(u64)); *tl++ = T_uuids_flags; *tl++ = sizeof(int); - memcpy(tl, &mdev->bc->md.flags, sizeof(int)); + memcpy(tl, &mdev->ldev->md.flags, sizeof(int)); tl = (unsigned short *)((char *)tl + sizeof(int)); put_ldev(mdev); } @@ -1802,8 +1803,10 @@ STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } /** - * drbd_nl_get_timeout_flag: - * Is used by drbdsetup to find out which timeout value to use. + * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use + * @mdev: DRBD device. + * @nlp: Netlink/connector packet from drbdsetup + * @reply: Reply packet for drbdsetup */ STATIC int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) @@ -1859,7 +1862,7 @@ STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl /* this is "skip initial sync", assume to be clean */ if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 && - mdev->bc->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { + mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { dev_info(DEV, "Preparing to skip initial sync\n"); skip_initial_sync = 1; } else if (mdev->state.conn >= C_CONNECTED) { diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 9f0a3c0e952c..b59b9d9f078c 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -35,7 +35,6 @@ #include #include #include "drbd_int.h" -#include "lru_cache.h" /* for lc_sprintf_stats */ STATIC int drbd_proc_open(struct inode *inode, struct file *file); @@ -137,7 +136,7 @@ STATIC void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e) { - struct bm_extent *bme = (struct bm_extent *)e; + struct bm_extent *bme = lc_entry(e, struct bm_extent, lce); seq_printf(seq, "%5d %s %s\n", bme->rs_left, bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------", @@ -244,14 +243,14 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v) mdev->rs_total); if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) { - lc_printf_stats(seq, mdev->resync); - lc_printf_stats(seq, mdev->act_log); + lc_seq_printf_stats(seq, mdev->resync); + lc_seq_printf_stats(seq, mdev->act_log); put_ldev(mdev); } if (proc_details >= 2) { if (mdev->resync) { - lc_dump(mdev->resync, seq, "rs_left", + lc_seq_dump_details(seq, mdev->resync, "rs_left", resync_dump_detail); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 25da228de2fd..24dc84698de7 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -85,7 +85,13 @@ static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epo #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) /** - * drbd_bp_alloc: Returns a page. Fails only if a signal comes in. + * drbd_bp_alloc() - Returns a page, fails only if a signal comes in + * @mdev: DRBD device. + * @gfp_mask: Get free page allocation mask + * + * Allocates a page from the kernel or our own page pool. In case that + * allocation would go beyond the max_buffers setting, this function sleeps + * until DRBD frees a page somewhere else. */ STATIC struct page *drbd_pp_alloc(struct drbd_conf *mdev, gfp_t gfp_mask) { @@ -223,7 +229,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, goto fail1; } - bio->bi_bdev = mdev->bc->backing_bdev; + bio->bi_bdev = mdev->ldev->backing_bdev; bio->bi_sector = sector; ds = data_size; @@ -703,9 +709,9 @@ STATIC enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *soc } /** - * drbd_socket_okay: - * Tests if the connection behind the socket still exists. If not it frees - * the socket. + * drbd_socket_okay() - Free the socket if its connection is not okay + * @mdev: DRBD device. + * @sock: pointer to the pointer to the socket. */ static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) { @@ -936,7 +942,7 @@ STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d int rv; if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { - rv = blkdev_issue_flush(mdev->bc->backing_bdev, NULL); + rv = blkdev_issue_flush(mdev->ldev->backing_bdev, NULL); if (rv) { dev_err(DEV, "local disk flush failed with status %d\n", rv); /* would rather check on EOPNOTSUPP, but that is not reliable. @@ -950,10 +956,6 @@ STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE); } -/** - * w_flush: Checks if an epoch can be closed and therefore might - * close and/or free the epoch object. - */ STATIC int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct flush_work *fw = (struct flush_work *)w; @@ -971,8 +973,10 @@ STATIC int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } /** - * drbd_may_finish_epoch: Checks if an epoch can be closed and therefore might - * close and/or free the epoch object. + * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it. + * @mdev: DRBD device. + * @epoch: Epoch object. + * @ev: Epoch event. */ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch, @@ -1088,8 +1092,9 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, } /** - * drbd_bump_write_ordering: It turned out that the current mdev->write_ordering - * method does not work on the backing block device. Try the next allowed method. + * drbd_bump_write_ordering() - Fall back to an other write ordering method + * @mdev: DRBD device. + * @wo: Write ordering method to try. */ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local) { @@ -1103,11 +1108,11 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) pwo = mdev->write_ordering; wo = min(pwo, wo); - if (wo == WO_bio_barrier && mdev->bc->dc.no_disk_barrier) + if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier) wo = WO_bdev_flush; - if (wo == WO_bdev_flush && mdev->bc->dc.no_disk_flush) + if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush) wo = WO_drain_io; - if (wo == WO_drain_io && mdev->bc->dc.no_disk_drain) + if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain) wo = WO_none; mdev->write_ordering = wo; if (pwo != mdev->write_ordering || wo == WO_bio_barrier) @@ -1115,8 +1120,10 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) } /** - * w_e_reissue: In case the IO subsystem delivered an error for an BIO with the - * BIO_RW_BARRIER flag set, retry that bio without the barrier flag set. + * w_e_reissue() - Worker callback; Resubmit a bio, without BIO_RW_BARRIER set + * @mdev: DRBD device. + * @w: work object. + * @cancel: The connection will be closed anyways (unused in this callback) */ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local) { @@ -1140,7 +1147,7 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea * re-init volatile members */ /* we still have a local reference, * get_ldev was done in receive_Data. */ - bio->bi_bdev = mdev->bc->backing_bdev; + bio->bi_bdev = mdev->ldev->backing_bdev; bio->bi_sector = e->sector; bio->bi_size = e->size; bio->bi_idx = 0; @@ -2066,7 +2073,7 @@ STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) int self, peer, rv = -100; unsigned long ch_self, ch_peer; - self = mdev->bc->md.uuid[UI_BITMAP] & 1; + self = mdev->ldev->md.uuid[UI_BITMAP] & 1; peer = mdev->p_uuid[UI_BITMAP] & 1; ch_peer = mdev->p_uuid[UI_SIZE]; @@ -2137,7 +2144,7 @@ STATIC int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) { int self, peer, hg, rv = -100; - self = mdev->bc->md.uuid[UI_BITMAP] & 1; + self = mdev->ldev->md.uuid[UI_BITMAP] & 1; peer = mdev->p_uuid[UI_BITMAP] & 1; switch (mdev->net_conf->after_sb_1p) { @@ -2183,7 +2190,7 @@ STATIC int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) { int self, peer, hg, rv = -100; - self = mdev->bc->md.uuid[UI_BITMAP] & 1; + self = mdev->ldev->md.uuid[UI_BITMAP] & 1; peer = mdev->p_uuid[UI_BITMAP] & 1; switch (mdev->net_conf->after_sb_2p) { @@ -2250,7 +2257,7 @@ STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l u64 self, peer; int i, j; - self = mdev->bc->md.uuid[UI_CURRENT] & ~((u64)1); + self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1); peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1); *rule_nr = 1; @@ -2299,20 +2306,20 @@ STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l } *rule_nr = 7; - self = mdev->bc->md.uuid[UI_BITMAP] & ~((u64)1); + self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1); peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1); if (self == peer) return 1; *rule_nr = 8; for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { - self = mdev->bc->md.uuid[i] & ~((u64)1); + self = mdev->ldev->md.uuid[i] & ~((u64)1); if (self == peer) return 2; } *rule_nr = 9; - self = mdev->bc->md.uuid[UI_BITMAP] & ~((u64)1); + self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1); peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1); if (self == peer && self != ((u64)0)) return 100; @@ -2347,7 +2354,7 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol hg = drbd_uuid_compare(mdev, &rule_nr); dev_info(DEV, "drbd_sync_handshake:\n"); - drbd_uuid_dump(mdev, "self", mdev->bc->md.uuid, + drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0); drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); @@ -2740,33 +2747,33 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) if (get_ldev(mdev)) { warn_if_differ_considerably(mdev, "lower level device sizes", - p_size, drbd_get_max_capacity(mdev->bc)); + p_size, drbd_get_max_capacity(mdev->ldev)); warn_if_differ_considerably(mdev, "user requested size", - p_usize, mdev->bc->dc.disk_size); + p_usize, mdev->ldev->dc.disk_size); /* if this is the first connect, or an otherwise expected * param exchange, choose the minimum */ if (mdev->state.conn == C_WF_REPORT_PARAMS) - p_usize = min_not_zero((sector_t)mdev->bc->dc.disk_size, + p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size, p_usize); - my_usize = mdev->bc->dc.disk_size; + my_usize = mdev->ldev->dc.disk_size; - if (mdev->bc->dc.disk_size != p_usize) { - mdev->bc->dc.disk_size = p_usize; + if (mdev->ldev->dc.disk_size != p_usize) { + mdev->ldev->dc.disk_size = p_usize; dev_info(DEV, "Peer sets u_size to %lu sectors\n", - (unsigned long)mdev->bc->dc.disk_size); + (unsigned long)mdev->ldev->dc.disk_size); } /* Never shrink a device with usable data during connect. But allow online shrinking if we are connected. */ - if (drbd_new_dev_size(mdev, mdev->bc) < + if (drbd_new_dev_size(mdev, mdev->ldev) < drbd_get_capacity(mdev->this_bdev) && mdev->state.disk >= D_OUTDATED && mdev->state.conn < C_CONNECTED) { dev_err(DEV, "The peer's disk size is too small!\n"); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - mdev->bc->dc.disk_size = my_usize; + mdev->ldev->dc.disk_size = my_usize; put_ldev(mdev); return FALSE; } @@ -2802,8 +2809,8 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) } if (get_ldev(mdev)) { - if (mdev->bc->known_size != drbd_get_capacity(mdev->bc->backing_bdev)) { - mdev->bc->known_size = drbd_get_capacity(mdev->bc->backing_bdev); + if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { + mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); ldsc = 1; } @@ -2866,7 +2873,7 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct p_header *h) int skip_initial_sync = mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 && - mdev->bc->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && + mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && (p_uuid[UI_FLAGS] & 8); if (skip_initial_sync) { dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); @@ -2893,8 +2900,8 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct p_header *h) } /** - * convert_state: - * Switches the view of the state. + * convert_state() - Converts the peer's view of the cluster state to our point of view + * @ps: The state as seen by the peer. */ STATIC union drbd_state convert_state(union drbd_state ps) { @@ -3519,7 +3526,7 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) fp = FP_DONT_CARE; if (get_ldev(mdev)) { - fp = mdev->bc->dc.fencing; + fp = mdev->ldev->dc.fencing; put_ldev(mdev); } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 2e70345a06d4..5c4039ad052e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -925,7 +925,7 @@ allocate_barrier: kfree(b); /* if someone else has beaten us to it... */ if (local) { - req->private_bio->bi_bdev = mdev->bc->backing_bdev; + req->private_bio->bi_bdev = mdev->ldev->backing_bdev; trace_drbd_bio(mdev, "Pri", req->private_bio, 0, NULL); @@ -1119,8 +1119,8 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct limit = bvec->bv_len; } else if (limit && get_ldev(mdev)) { struct request_queue * const b = - mdev->bc->backing_bdev->bd_disk->queue; - if (b->merge_bvec_fn && mdev->bc->dc.use_bmbv) { + mdev->ldev->backing_bdev->bd_disk->queue; + if (b->merge_bvec_fn && mdev->ldev->dc.use_bmbv) { backing_limit = b->merge_bvec_fn(b, bvm, bvec); limit = min(limit, backing_limit); } diff --git a/drivers/block/drbd/drbd_tracing.c b/drivers/block/drbd/drbd_tracing.c index ab5aba9c4972..b467e92dda76 100644 --- a/drivers/block/drbd/drbd_tracing.c +++ b/drivers/block/drbd/drbd_tracing.c @@ -124,7 +124,7 @@ static void probe_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index) dev_info(DEV, " uuid[%s] now %016llX\n", uuid_str[index], - (unsigned long long)mdev->bc->md.uuid[index]); + (unsigned long long)mdev->ldev->md.uuid[index]); } static void probe_drbd_md_io(struct drbd_conf *mdev, int rw, @@ -223,30 +223,20 @@ static void probe_drbd_actlog(struct drbd_conf *mdev, sector_t sector, char* msg (int)BM_SECT_TO_EXT(sector)); } -/* - * - * drbd_print_buffer - * - * This routine dumps binary data to the debugging output. Can be - * called at interrupt level. - * - * Arguments: - * - * prefix - String is output at the beginning of each line output - * flags - Control operation of the routine. Currently defined - * Flags are: - * DBGPRINT_BUFFADDR; if set, each line starts with the - * virtual address of the line being outupt. If clear, - * each line starts with the offset from the beginning - * of the buffer. - * size - Indicates the size of each entry in the buffer. Supported - * values are sizeof(char), sizeof(short) and sizeof(int) - * buffer - Start address of buffer - * buffer_va - Virtual address of start of buffer (normally the same - * as Buffer, but having it separate allows it to hold - * file address for example) - * length - length of buffer - * +/** + * drbd_print_buffer() - Hexdump arbitraty binary data into a buffer + * @prefix: String is output at the beginning of each line output. + * @flags: Currently only defined flag: DBGPRINT_BUFFADDR; if set, each + * line starts with the virtual address of the line being + * outupt. If clear, each line starts with the offset from the + * beginning of the buffer. + * @size: Indicates the size of each entry in the buffer. Supported + * values are sizeof(char), sizeof(short) and sizeof(int) + * @buffer: Start address of buffer + * @buffer_va: Virtual address of start of buffer (normally the same + * as Buffer, but having it separate allows it to hold + * file address for example) + * @length: length of buffer */ static void drbd_print_buffer(const char *prefix, unsigned int flags, int size, const void *buffer, const void *buffer_va, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index dd984502d62e..96065835fb69 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -250,8 +250,8 @@ int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel) struct drbd_request *req = (struct drbd_request *)w; int ok; - /* NOTE: mdev->bc can be NULL by the time we get here! */ - /* D_ASSERT(mdev->bc->dc.on_io_error != EP_PASS_ON); */ + /* NOTE: mdev->ldev can be NULL by the time we get here! */ + /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */ /* the only way this callback is scheduled is from _req_may_be_done, * when it is done and had a local write error, see comments there */ @@ -740,7 +740,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) int i; for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++) _drbd_uuid_set(mdev, i, mdev->p_uuid[i]); - drbd_uuid_set(mdev, UI_BITMAP, mdev->bc->md.uuid[UI_CURRENT]); + drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]); _drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]); } else { dev_err(DEV, "mdev->p_uuid is NULL! BUG\n"); @@ -754,7 +754,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) * know of the peer. */ int i; for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++) - mdev->p_uuid[i] = mdev->bc->md.uuid[i]; + mdev->p_uuid[i] = mdev->ldev->md.uuid[i]; } } @@ -781,7 +781,10 @@ out: } /** - * w_e_end_data_req: Send the answer (P_DATA_REPLY) in response to a DataRequest. + * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST + * @mdev: DRBD device. + * @w: work object. + * @cancel: The connection will be closed anyways */ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { @@ -823,7 +826,10 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } /** - * w_e_end_rsdata_req: Send the answer (P_RS_DATA_REPLY) to a RSDataRequest. + * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS + * @mdev: DRBD device. + * @w: work object. + * @cancel: The connection will be closed anyways */ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { @@ -1100,7 +1106,10 @@ int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } /** - * w_send_dblock: Send a mirrored write request. + * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request + * @mdev: DRBD device. + * @w: work object. + * @cancel: The connection will be closed anyways */ int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { @@ -1119,7 +1128,10 @@ int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } /** - * w_send_read_req: Send a read requests. + * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet + * @mdev: DRBD device. + * @w: work object. + * @cancel: The connection will be closed anyways */ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { @@ -1163,9 +1175,9 @@ STATIC int _drbd_may_sync_now(struct drbd_conf *mdev) } /** - * _drbd_pause_after: - * Finds all devices that may not resync now, and causes them to - * pause their resynchronisation. + * _drbd_pause_after() - Pause resync on all devices that may not resync now + * @mdev: DRBD device. + * * Called from process context only (admin command and after_state_ch). */ STATIC int _drbd_pause_after(struct drbd_conf *mdev) @@ -1188,9 +1200,9 @@ STATIC int _drbd_pause_after(struct drbd_conf *mdev) } /** - * _drbd_resume_next: - * Finds all devices that can resume resynchronisation - * process, and causes them to resume. + * _drbd_resume_next() - Resume resync on all devices that may resync now + * @mdev: DRBD device. + * * Called from process context only (admin command and worker). */ STATIC int _drbd_resume_next(struct drbd_conf *mdev) @@ -1244,12 +1256,12 @@ void drbd_alter_sa(struct drbd_conf *mdev, int na) } /** - * drbd_start_resync: - * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET - * Start the resync process. Called from process context only, - * either admin command or drbd_receiver. - * Note, this function might bring you directly into one of the - * PausedSync* states. + * drbd_start_resync() - Start the resync process + * @mdev: DRBD device. + * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET + * + * This function might bring you directly into one of the + * C_PAUSED_SYNC_* states. */ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) { diff --git a/drivers/block/drbd/lru_cache.c b/drivers/block/drbd/lru_cache.c deleted file mode 100644 index 80b0839a529d..000000000000 --- a/drivers/block/drbd/lru_cache.c +++ /dev/null @@ -1,398 +0,0 @@ -/* - lru_cache.c - - This file is part of DRBD by Philipp Reisner and Lars Ellenberg. - - Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. - Copyright (C) 2003-2008, Philipp Reisner . - Copyright (C) 2003-2008, Lars Ellenberg . - - drbd is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - drbd is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with drbd; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - - */ - -#include -#include -#include /* for memset */ -#include /* for seq_printf */ -#include "lru_cache.h" - -/* this is developers aid only! */ -#define PARANOIA_ENTRY() BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)) -#define PARANOIA_LEAVE() do { clear_bit(__LC_PARANOIA, &lc->flags); smp_mb__after_clear_bit(); } while (0) -#define RETURN(x...) do { PARANOIA_LEAVE(); return x ; } while (0) - -static size_t size_of_lc(unsigned int e_count, size_t e_size) -{ - return sizeof(struct lru_cache) - + e_count * (e_size + sizeof(struct hlist_head)); -} - -static void lc_init(struct lru_cache *lc, - const size_t bytes, const char *name, - const unsigned int e_count, const size_t e_size, - void *private_p) -{ - struct lc_element *e; - unsigned int i; - - BUG_ON(!e_count); - - memset(lc, 0, bytes); - INIT_LIST_HEAD(&lc->in_use); - INIT_LIST_HEAD(&lc->lru); - INIT_LIST_HEAD(&lc->free); - lc->element_size = e_size; - lc->nr_elements = e_count; - lc->new_number = -1; - lc->lc_private = private_p; - lc->name = name; - for (i = 0; i < e_count; i++) { - e = lc_entry(lc, i); - e->lc_number = LC_FREE; - list_add(&e->list, &lc->free); - /* memset(,0,) did the rest of init for us */ - } -} - -/** - * lc_alloc: allocates memory for @e_count objects of @e_size bytes plus the - * struct lru_cache, and the hash table slots. - * returns pointer to a newly initialized lru_cache object with said parameters. - */ -struct lru_cache *lc_alloc(const char *name, unsigned int e_count, - size_t e_size, void *private_p) -{ - struct lru_cache *lc; - size_t bytes; - - BUG_ON(!e_count); - e_size = max(sizeof(struct lc_element), e_size); - bytes = size_of_lc(e_count, e_size); - lc = vmalloc(bytes); - if (lc) - lc_init(lc, bytes, name, e_count, e_size, private_p); - return lc; -} - -/** - * lc_free: Frees memory allocated by lc_alloc. - * @lc: The lru_cache object - */ -void lc_free(struct lru_cache *lc) -{ - vfree(lc); -} - -/** - * lc_reset: does a full reset for @lc and the hash table slots. - * It is roughly the equivalent of re-allocating a fresh lru_cache object, - * basically a short cut to lc_free(lc); lc = lc_alloc(...); - */ -void lc_reset(struct lru_cache *lc) -{ - lc_init(lc, size_of_lc(lc->nr_elements, lc->element_size), lc->name, - lc->nr_elements, lc->element_size, lc->lc_private); -} - -size_t lc_printf_stats(struct seq_file *seq, struct lru_cache *lc) -{ - /* NOTE: - * total calls to lc_get are - * (starving + hits + misses) - * misses include "dirty" count (update from an other thread in - * progress) and "changed", when this in fact lead to an successful - * update of the cache. - */ - return seq_printf(seq, "\t%s: used:%u/%u " - "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", - lc->name, lc->used, lc->nr_elements, - lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); -} - -static unsigned int lc_hash_fn(struct lru_cache *lc, unsigned int enr) -{ - return enr % lc->nr_elements; -} - - -/** - * lc_find: Returns the pointer to an element, if the element is present - * in the hash table. In case it is not this function returns NULL. - * @lc: The lru_cache object - * @enr: element number - */ -struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) -{ - struct hlist_node *n; - struct lc_element *e; - - BUG_ON(!lc); - hlist_for_each_entry(e, n, lc->slot + lc_hash_fn(lc, enr), colision) { - if (e->lc_number == enr) - return e; - } - return NULL; -} - -static struct lc_element *lc_evict(struct lru_cache *lc) -{ - struct list_head *n; - struct lc_element *e; - - if (list_empty(&lc->lru)) - return NULL; - - n = lc->lru.prev; - e = list_entry(n, struct lc_element, list); - - list_del(&e->list); - hlist_del(&e->colision); - return e; -} - -/** - * lc_del: Removes an element from the cache (and therefore adds the - * element's storage to the free list) - * - * @lc: The lru_cache object - * @e: The element to remove - */ -void lc_del(struct lru_cache *lc, struct lc_element *e) -{ - PARANOIA_ENTRY(); - BUG_ON(e->refcnt); - list_del(&e->list); - hlist_del_init(&e->colision); - e->lc_number = LC_FREE; - e->refcnt = 0; - list_add(&e->list, &lc->free); - RETURN(); -} - -static struct lc_element *lc_get_unused_element(struct lru_cache *lc) -{ - struct list_head *n; - - if (list_empty(&lc->free)) - return lc_evict(lc); - - n = lc->free.next; - list_del(n); - return list_entry(n, struct lc_element, list); -} - -static int lc_unused_element_available(struct lru_cache *lc) -{ - if (!list_empty(&lc->free)) - return 1; /* something on the free list */ - if (!list_empty(&lc->lru)) - return 1; /* something to evict */ - - return 0; -} - - -/** - * lc_get: Finds an element in the cache, increases its usage count, - * "touches" and returns it. - * In case the requested number is not present, it needs to be added to the - * cache. Therefore it is possible that an other element becomes eviced from - * the cache. In either case, the user is notified so he is able to e.g. keep - * a persistent log of the cache changes, and therefore the objects in use. - * - * Return values: - * NULL if the requested element number was not in the cache, and no unused - * element could be recycled - * pointer to the element with the REQUESTED element number - * In this case, it can be used right away - * - * pointer to an UNUSED element with some different element number. - * In this case, the cache is marked dirty, and the returned element - * pointer is removed from the lru list and hash collision chains. - * The user now should do whatever houskeeping is necessary. Then he - * needs to call lc_element_changed(lc,element_pointer), to finish the - * change. - * - * NOTE: The user needs to check the lc_number on EACH use, so he recognizes - * any cache set change. - * - * @lc: The lru_cache object - * @enr: element number - */ -struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) -{ - struct lc_element *e; - - BUG_ON(!lc); - BUG_ON(!lc->nr_elements); - - PARANOIA_ENTRY(); - if (lc->flags & LC_STARVING) { - ++lc->starving; - RETURN(NULL); - } - - e = lc_find(lc, enr); - if (e) { - ++lc->hits; - if (e->refcnt++ == 0) - lc->used++; - list_move(&e->list, &lc->in_use); /* Not evictable... */ - RETURN(e); - } - - ++lc->misses; - - /* In case there is nothing available and we can not kick out - * the LRU element, we have to wait ... - */ - if (!lc_unused_element_available(lc)) { - __set_bit(__LC_STARVING, &lc->flags); - RETURN(NULL); - } - - /* it was not present in the cache, find an unused element, - * which then is replaced. - * we need to update the cache; serialize on lc->flags & LC_DIRTY - */ - if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { - ++lc->dirty; - RETURN(NULL); - } - - e = lc_get_unused_element(lc); - BUG_ON(!e); - - clear_bit(__LC_STARVING, &lc->flags); - BUG_ON(++e->refcnt != 1); - lc->used++; - - lc->changing_element = e; - lc->new_number = enr; - - RETURN(e); -} - -/* similar to lc_get, - * but only gets a new reference on an existing element. - * you either get the requested element, or NULL. - */ -struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) -{ - struct lc_element *e; - - BUG_ON(!lc); - BUG_ON(!lc->nr_elements); - - PARANOIA_ENTRY(); - if (lc->flags & LC_STARVING) { - ++lc->starving; - RETURN(NULL); - } - - e = lc_find(lc, enr); - if (e) { - ++lc->hits; - if (e->refcnt++ == 0) - lc->used++; - list_move(&e->list, &lc->in_use); /* Not evictable... */ - } - RETURN(e); -} - -void lc_changed(struct lru_cache *lc, struct lc_element *e) -{ - PARANOIA_ENTRY(); - BUG_ON(e != lc->changing_element); - ++lc->changed; - e->lc_number = lc->new_number; - list_add(&e->list, &lc->in_use); - hlist_add_head(&e->colision, - lc->slot + lc_hash_fn(lc, lc->new_number)); - lc->changing_element = NULL; - lc->new_number = -1; - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); - RETURN(); -} - - -unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) -{ - BUG_ON(!lc); - BUG_ON(!lc->nr_elements); - BUG_ON(!e); - - PARANOIA_ENTRY(); - BUG_ON(e->refcnt == 0); - BUG_ON(e == lc->changing_element); - if (--e->refcnt == 0) { - /* move it to the front of LRU. */ - list_move(&e->list, &lc->lru); - lc->used--; - clear_bit(__LC_STARVING, &lc->flags); - smp_mb__after_clear_bit(); - } - RETURN(e->refcnt); -} - - -/** - * lc_set: Sets an element in the cache. You might use this function to - * setup the cache. It is expected that the elements are properly initialized. - * @lc: The lru_cache object - * @enr: element number - * @index: The elements' position in the cache - */ -void lc_set(struct lru_cache *lc, unsigned int enr, int index) -{ - struct lc_element *e; - - if (index < 0 || index >= lc->nr_elements) - return; - - e = lc_entry(lc, index); - e->lc_number = enr; - - hlist_del_init(&e->colision); - hlist_add_head(&e->colision, lc->slot + lc_hash_fn(lc, enr)); - list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); -} - -/** - * lc_dump: Dump a complete LRU cache to seq in textual form. - */ -void lc_dump(struct lru_cache *lc, struct seq_file *seq, char *utext, - void (*detail) (struct seq_file *, struct lc_element *)) -{ - unsigned int nr_elements = lc->nr_elements; - struct lc_element *e; - int i; - - seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); - for (i = 0; i < nr_elements; i++) { - e = lc_entry(lc, i); - if (e->lc_number == LC_FREE) { - seq_printf(seq, "\t%2d: FREE\n", i); - } else { - seq_printf(seq, "\t%2d: %4u %4u ", i, - e->lc_number, - e->refcnt); - detail(seq, e); - } - } -} - diff --git a/drivers/block/drbd/lru_cache.h b/drivers/block/drbd/lru_cache.h deleted file mode 100644 index eabf897948d0..000000000000 --- a/drivers/block/drbd/lru_cache.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - lru_cache.h - - This file is part of DRBD by Philipp Reisner and Lars Ellenberg. - - Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. - Copyright (C) 2003-2008, Philipp Reisner . - Copyright (C) 2003-2008, Lars Ellenberg . - - drbd is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - drbd is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with drbd; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - - */ - -#ifndef LRU_CACHE_H -#define LRU_CACHE_H - -#include - -struct lc_element { - struct hlist_node colision; - struct list_head list; /* LRU list or free list */ - unsigned int refcnt; - unsigned int lc_number; -}; - -struct lru_cache { - struct list_head lru; - struct list_head free; - struct list_head in_use; - size_t element_size; - unsigned int nr_elements; - unsigned int new_number; - - unsigned int used; - unsigned long flags; - unsigned long hits, misses, starving, dirty, changed; - struct lc_element *changing_element; /* just for paranoia */ - - void *lc_private; - const char *name; - - struct hlist_head slot[0]; - /* hash colision chains here, then element storage. */ -}; - - -/* flag-bits for lru_cache */ -enum { - __LC_PARANOIA, - __LC_DIRTY, - __LC_STARVING, -}; -#define LC_PARANOIA (1<<__LC_PARANOIA) -#define LC_DIRTY (1<<__LC_DIRTY) -#define LC_STARVING (1<<__LC_STARVING) - -extern struct lru_cache *lc_alloc(const char *name, unsigned int e_count, - size_t e_size, void *private_p); -extern void lc_reset(struct lru_cache *lc); -extern void lc_free(struct lru_cache *lc); -extern void lc_set(struct lru_cache *lc, unsigned int enr, int index); -extern void lc_del(struct lru_cache *lc, struct lc_element *element); - -extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); -extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); -extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); -extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); -extern void lc_changed(struct lru_cache *lc, struct lc_element *e); - -struct seq_file; -extern size_t lc_printf_stats(struct seq_file *seq, struct lru_cache *lc); - -void lc_dump(struct lru_cache *lc, struct seq_file *seq, char *utext, - void (*detail) (struct seq_file *, struct lc_element *)); - -/* This can be used to stop lc_get from changing the set of active elements. - * Note that the reference counts and order on the lru list may still change. - * returns true if we aquired the lock. - */ -static inline int lc_try_lock(struct lru_cache *lc) -{ - return !test_and_set_bit(__LC_DIRTY, &lc->flags); -} - -static inline void lc_unlock(struct lru_cache *lc) -{ - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); -} - -static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) -{ - struct lc_element *e = lc_find(lc, enr); - return e && e->refcnt; -} - -#define LC_FREE (-1U) - -#define lc_e_base(lc) ((char *)((lc)->slot + (lc)->nr_elements)) -#define lc_entry(lc, i) ((struct lc_element *) \ - (lc_e_base(lc) + (i)*(lc)->element_size)) -#define lc_index_of(lc, e) (((char *)(e) - lc_e_base(lc))/(lc)->element_size) - -#endif diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h new file mode 100644 index 000000000000..69e2455b00be --- /dev/null +++ b/include/linux/lru_cache.h @@ -0,0 +1,285 @@ +/* + lru_cache.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#ifndef LRU_CACHE_H +#define LRU_CACHE_H + +#include + +/* +This header file (and its .c file; kernel-doc of functions see there) + define a helper framework to easily keep track of index:label associations, + and changes to an "active set" of objects, as well as pending transactions, + to persistently record those changes. + + We use an LRU policy if it is necessary to "cool down" a region currently in + the active set before we can "heat" a previously unused region. + + Because of this later property, it is called "lru_cache". + As it actually Tracks Objects in an Active SeT, we could also call it + toast (incidentally that is what may happen to the data on the + backend storage uppon next resync, if we don't get it right). + +What for? + +We replicate IO (more or less synchronously) to local and remote disk. + +For crash recovery after replication node failure, + we need to resync all regions that have been target of in-flight WRITE IO + (in use, or "hot", regions), as we don't know wether or not those WRITEs have + made it to stable storage. + + To avoid a "full resync", we need to persistently track these regions. + + This is known as "write intent log", and can be implemented as on-disk + (coarse or fine grained) bitmap, or other meta data. + + To avoid the overhead of frequent extra writes to this meta data area, + usually the condition is softened to regions that _may_ have been target of + in-flight WRITE IO, e.g. by only lazily clearing the on-disk write-intent + bitmap, trading frequency of meta data transactions against amount of + (possibly unneccessary) resync traffic. + + If we set a hard limit on the area that may be "hot" at any given time, we + limit the amount of resync traffic needed for crash recovery. + +For recovery after replication link failure, + we need to resync all blocks that have been changed on the other replica + in the mean time, or, if both replica have been changed independently [*], + all blocks that have been changed on either replica in the mean time. + [*] usually as a result of a cluster split-brain and insufficient protection. + but there are valid use cases to do this on purpose. + + Tracking those blocks can be implemented as "dirty bitmap". + Having it fine-grained reduces the amount of resync traffic. + It should also be persistent, to allow for reboots (or crashes) + while the replication link is down. + +There are various possible implementations for persistently storing +write intent log information, three of which are mentioned here. + +"Chunk dirtying" + The on-disk "dirty bitmap" may be re-used as "write-intent" bitmap as well. + To reduce the frequency of bitmap updates for write-intent log purposes, + one could dirty "chunks" (of some size) at a time of the (fine grained) + on-disk bitmap, while keeping the in-memory "dirty" bitmap as clean as + possible, flushing it to disk again when a previously "hot" (and on-disk + dirtied as full chunk) area "cools down" again (no IO in flight anymore, + and none expected in the near future either). + +"Explicit (coarse) write intent bitmap" + An other implementation could chose a (probably coarse) explicit bitmap, + for write-intent log purposes, additionally to the fine grained dirty bitmap. + +"Activity log" + Yet an other implementation may keep track of the hot regions, by starting + with an empty set, and writing down a journal of region numbers that have + become "hot", or have "cooled down" again. + + To be able to use a ring buffer for this journal of changes to the active + set, we not only record the actual changes to that set, but also record the + not changing members of the set in a round robin fashion. To do so, we use a + fixed (but configurable) number of slots which we can identify by index, and + associate region numbers (labels) with these indices. + For each transaction recording a change to the active set, we record the + change itself (index: -old_label, +new_label), and which index is associated + with which label (index: current_label) within a certain sliding window that + is moved further over the available indices with each such transaction. + + Thus, for crash recovery, if the ringbuffer is sufficiently large, we can + accurately reconstruct the active set. + + Sufficiently large depends only on maximum number of active objects, and the + size of the sliding window recording "index: current_label" associations within + each transaction. + + This is what we call the "activity log". + + Currently we need one activity log transaction per single label change, which + does not give much benefit over the "dirty chunks of bitmap" approach, other + than potentially less seeks. + + We plan to change the transaction format to support multiple changes per + transaction, which then would reduce several (disjoint, "random") updates to + the bitmap into one transaction to the activity log ring buffer. +*/ + +/* this defines an element in a tracked set + * .colision is for hash table lookup. + * When we process a new IO request, we know its sector, thus can deduce the + * region number (label) easily. To do the label -> object lookup without a + * full list walk, we use a simple hash table. + * + * .list is on one of three lists: + * in_use: currently in use (refcnt > 0, lc_number != LC_FREE) + * lru: unused but ready to be reused or recycled + * (ts_refcnt == 0, lc_number != LC_FREE), + * free: unused but ready to be recycled + * (ts_refcnt == 0, lc_number == LC_FREE), + * + * an element is said to be "in the active set", + * if either on "in_use" or "lru", i.e. lc_number != LC_FREE. + * + * DRBD currently only uses 61 elements on the resync lru_cache (total memory + * usage 2 pages), and up to 3833 elements on the act_log lru_cache, totalling + * ~215 kB for 64bit architechture, ~53 pages. + * + * We usually do not actually free these objects again, but only "recycle" + * them, as the change "index: -old_label, +LC_FREE" would need a transaction + * as well. Which also means that using a kmem_cache or even mempool to + * allocate the objects from wastes some resources. But it would avoid high + * order page allocations in kmalloc, so we may change to a kmem_cache backed + * allocation of the elements in the near future. + */ +struct lc_element { + struct hlist_node colision; + struct list_head list; /* LRU list or free list */ + unsigned int refcnt; + unsigned int lc_number; +}; + +struct lru_cache { + /* the least recently used item is kept at lru->prev */ + struct list_head lru; + struct list_head free; + struct list_head in_use; + + /* size of tracked objects */ + size_t element_size; + /* offset of struct lc_element member in the tracked object */ + size_t element_off; + + /* number of elements (indices) */ + unsigned int nr_elements; + + /* statistics */ + unsigned int used; + unsigned long hits, misses, starving, dirty, changed; + + /* see below: flag-bits for lru_cache */ + unsigned long flags; + + /* when changing the label of an index element */ + unsigned int new_number; + + /* for paranoia when changing the label of an index element */ + struct lc_element *changing_element; + + void *lc_private; + const char *name; + + struct hlist_head slot[0]; + /* hash colision chains here, then element storage. */ +}; + + +/* flag-bits for lru_cache */ +enum { + /* debugging aid, to catch concurrent access early. + * user needs to guarantee exclusive access by proper locking! */ + __LC_PARANOIA, + /* if we need to change the set, but currently there is a changing + * transaction pending, we are "dirty", and must deferr further + * changing requests */ + __LC_DIRTY, + /* if we need to change the set, but currently there is no free nor + * unused element available, we are "starving", and must not give out + * further references, to guarantee that eventually some refcnt will + * drop to zero and we will be able to make progress again, changing + * the set, writing the transaction. + * if the statistics say we are frequently starving, + * nr_elements is too small. */ + __LC_STARVING, +}; +#define LC_PARANOIA (1<<__LC_PARANOIA) +#define LC_DIRTY (1<<__LC_DIRTY) +#define LC_STARVING (1<<__LC_STARVING) + +extern struct lru_cache *lc_create(const char *name, unsigned int e_count, + size_t e_size, size_t e_off); +extern void lc_reset(struct lru_cache *lc); +extern void lc_destroy(struct lru_cache *lc); +extern void lc_set(struct lru_cache *lc, unsigned int enr, int index); +extern void lc_del(struct lru_cache *lc, struct lc_element *element); + +extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); +extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); +extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); +extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); +extern void lc_changed(struct lru_cache *lc, struct lc_element *e); + +struct seq_file; +extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); + +extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, + void (*detail) (struct seq_file *, struct lc_element *)); + +/* This can be used to stop lc_get from changing the set of active elements. + * Note that the reference counts and order on the lru list may still change. + * returns true if we aquired the lock. + */ +static inline int lc_try_lock(struct lru_cache *lc) +{ + return !test_and_set_bit(__LC_DIRTY, &lc->flags); +} + +static inline void lc_unlock(struct lru_cache *lc) +{ + clear_bit(__LC_DIRTY, &lc->flags); + smp_mb__after_clear_bit(); +} + +static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e = lc_find(lc, enr); + return e && e->refcnt; +} + +#define LC_FREE (-1U) + +#define lc_entry(ptr, type, member) \ + container_of(ptr, type, member) + +static inline struct lc_element * +lc_element_by_index(struct lru_cache *lc, unsigned int i) +{ + BUG_ON(i >= lc->nr_elements); + return (struct lc_element *)( + ((char *)(lc->slot + lc->nr_elements)) + + i * lc->element_size + + lc->element_off); +} + +static inline size_t lc_index_of(struct lru_cache *lc, struct lc_element *e) +{ + size_t i = ((char *)(e) - lc->element_off + - ((char *)(lc->slot + lc->nr_elements))) + / lc->element_size; + BUG_ON(i >= lc->nr_elements); + BUG_ON(e != lc_element_by_index(lc, i)); + return i; +} + +#endif diff --git a/lib/Kconfig b/lib/Kconfig index 8ade0a7a91e0..8018c5b70562 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -191,4 +191,7 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS config NLATTR bool +config LRU_CACHE + bool + endmenu diff --git a/lib/Makefile b/lib/Makefile index 33a40e40e3ee..ade9b86d04d1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -90,6 +90,8 @@ obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o obj-$(CONFIG_NLATTR) += nlattr.o +obj-$(CONFIG_LRU_CACHE) += lru_cache.o + obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o hostprogs-y := gen_crc32table diff --git a/lib/lru_cache.c b/lib/lru_cache.c new file mode 100644 index 000000000000..f8632f1f7f7c --- /dev/null +++ b/lib/lru_cache.c @@ -0,0 +1,464 @@ +/* + lru_cache.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner . + Copyright (C) 2003-2008, Lars Ellenberg . + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include +#include +#include +#include /* for memset */ +#include /* for seq_printf */ +#include + +/* this is developers aid only! */ +#define PARANOIA_ENTRY() BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)) +#define PARANOIA_LEAVE() do { clear_bit(__LC_PARANOIA, &lc->flags); smp_mb__after_clear_bit(); } while (0) +#define RETURN(x...) do { PARANOIA_LEAVE(); return x ; } while (0) + +static size_t size_of_lc(unsigned int e_count, size_t e_size) +{ + return sizeof(struct lru_cache) + + e_count * (e_size + sizeof(struct hlist_head)); +} + +static void lc_init(struct lru_cache *lc, + const size_t bytes, const char *name, + const unsigned int e_count, const size_t e_size, + const size_t e_off) +{ + struct lc_element *e; + unsigned int i; + + BUG_ON(!e_count); + + memset(lc, 0, bytes); + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + lc->element_size = e_size; + lc->element_off = e_off; + lc->nr_elements = e_count; + lc->new_number = -1; + lc->name = name; + for (i = 0; i < e_count; i++) { + e = lc_element_by_index(lc, i); + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + /* memset(,0,) did the rest of init for us */ + } +} + +/** + * lc_create - prepares to track objects in an active set + * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump + * @e_count: number of elements allowed to be active simultaneously + * @e_size: size of the tracked objects + * @e_off: offset to the &struct lc_element member in a tracked object + * + * Returns a pointer to a newly initialized struct lru_cache on success, + * or NULL on (allocation) failure. + */ +struct lru_cache *lc_create(const char *name, unsigned int e_count, + size_t e_size, size_t e_off) +{ + struct lru_cache *lc; + size_t bytes; + + BUG_ON(!e_count); + BUG_ON(e_size < sizeof(struct lc_element)); + BUG_ON(e_size - sizeof(struct lc_element) < e_off); + e_size = ALIGN(e_size, sizeof(void *)); + e_size = max(sizeof(struct lc_element), e_size); + bytes = size_of_lc(e_count, e_size); + lc = kmalloc(bytes, GFP_KERNEL); + if (lc) + lc_init(lc, bytes, name, e_count, e_size, e_off); + return lc; +} + +/** + * lc_destroy - frees memory allocated by lc_create() + * @lc: the lru cache to operate on + */ +void lc_destroy(struct lru_cache *lc) +{ + kfree(lc); +} + +/** + * lc_reset - does a full reset for @lc and the hash table slots. + * @lc: the lru cache to operate on + * + * It is roughly the equivalent of re-allocating a fresh lru_cache object, + * basically a short cut to lc_destroy(lc); lc = lc_create(...); + */ +void lc_reset(struct lru_cache *lc) +{ + lc_init(lc, size_of_lc(lc->nr_elements, lc->element_size), lc->name, + lc->nr_elements, lc->element_size, lc->element_off); +} + +/** + * lc_seq_printf_stats - print stats about @ts into @seq + * @seq: the seq_file to print into + * @ts: the tracked set to print statistics of + */ +size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) +{ + /* NOTE: + * total calls to lc_get are + * (starving + hits + misses) + * misses include "dirty" count (update from an other thread in + * progress) and "changed", when this in fact lead to an successful + * update of the cache. + */ + return seq_printf(seq, "\t%s: used:%u/%u " + "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + lc->name, lc->used, lc->nr_elements, + lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); +} + +static unsigned int lc_hash_fn(struct lru_cache *lc, unsigned int enr) +{ + return enr % lc->nr_elements; +} + + +/** + * lc_find - find element by label, if present in the hash table + * @lc: The lru_cache object + * @enr: element number + * + * Returns the pointer to an element, if the element with the requested + * "label" or element number is present in the hash table, + * or NULL if not found. Does not change the refcnt. + */ +struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) +{ + struct hlist_node *n; + struct lc_element *e; + + BUG_ON(!lc); + hlist_for_each_entry(e, n, lc->slot + lc_hash_fn(lc, enr), colision) { + if (e->lc_number == enr) + return e; + } + return NULL; +} + +/* returned element will be "recycled" immediately */ +static struct lc_element *lc_evict(struct lru_cache *lc) +{ + struct list_head *n; + struct lc_element *e; + + if (list_empty(&lc->lru)) + return NULL; + + n = lc->lru.prev; + e = list_entry(n, struct lc_element, list); + + list_del(&e->list); + hlist_del(&e->colision); + return e; +} + +/** + * lc_del - removes an element from the cache + * @lc: The lru_cache object + * @e: The element to remove + * + * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list, + * sets @e->enr to %LC_FREE. + */ +void lc_del(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + BUG_ON(e < lc_element_by_index(lc, 0)); + BUG_ON(e > lc_element_by_index(lc, lc->nr_elements-1)); + BUG_ON(e->refcnt); + list_del(&e->list); + hlist_del_init(&e->colision); + e->lc_number = LC_FREE; + e->refcnt = 0; + list_add(&e->list, &lc->free); + RETURN(); +} + +static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +{ + struct list_head *n; + + if (list_empty(&lc->free)) + return lc_evict(lc); + + n = lc->free.next; + list_del(n); + return list_entry(n, struct lc_element, list); +} + +static int lc_unused_element_available(struct lru_cache *lc) +{ + if (!list_empty(&lc->free)) + return 1; /* something on the free list */ + if (!list_empty(&lc->lru)) + return 1; /* something to evict */ + + return 0; +} + + +/** + * lc_get - get element by label, maybe change the active set + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Finds an element in the cache, increases its usage count, + * "touches" and returns it. + * + * In case the requested number is not present, it needs to be added to the + * cache. Therefore it is possible that an other element becomes eviced from + * the cache. In either case, the user is notified so he is able to e.g. keep + * a persistent log of the cache changes, and therefore the objects in use. + * + * Return values: + * NULL + * The cache was marked %TS_STARVING, + * or the requested label was not in the active set + * and a changing transaction is still pending (@lc was marked %LC_DIRTY). + * Or no unused or free element could be recycled (@ts will be marked as + * %TS_STARVING, blocking further ts_get() operations). + * + * pointer to the element with the REQUESTED element number. + * In this case, it can be used right away + * + * pointer to an UNUSED element with some different element number, + * where that different number may also be %LC_FREE. + * + * In this case, the cache is marked %LC_DIRTY (blocking further changes), + * and the returned element pointer is removed from the lru list and + * hash collision chains. The user now should do whatever houskeeping + * is necessary. + * Then he must call lc_changed(lc,element_pointer), to finish + * the change. + * + * NOTE: The user needs to check the lc_number on EACH use, so he recognizes + * any cache set change. + */ +struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + RETURN(e); + } + + ++lc->misses; + + /* In case there is nothing available and we can not kick out + * the LRU element, we have to wait ... + */ + if (!lc_unused_element_available(lc)) { + __set_bit(__LC_STARVING, &lc->flags); + RETURN(NULL); + } + + /* it was not present in the active set. + * we are going to recycle an unused (or even "free") element. + * user may need to commit a transaction to record that change. + * we serialize on flags & TF_DIRTY */ + if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { + ++lc->dirty; + RETURN(NULL); + } + + e = lc_get_unused_element(lc); + BUG_ON(!e); + + clear_bit(__LC_STARVING, &lc->flags); + BUG_ON(++e->refcnt != 1); + lc->used++; + + lc->changing_element = e; + lc->new_number = enr; + + RETURN(e); +} + +/* similar to lc_get, + * but only gets a new reference on an existing element. + * you either get the requested element, or NULL. + * will be consolidated into one function. + */ +struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + } + RETURN(e); +} + +/** + * lc_changed - tell @lc that the change has been recorded + * @lc: the lru cache to operate on + * @e: the element pending label change + */ +void lc_changed(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + BUG_ON(e != lc->changing_element); + ++lc->changed; + e->lc_number = lc->new_number; + list_add(&e->list, &lc->in_use); + hlist_add_head(&e->colision, + lc->slot + lc_hash_fn(lc, lc->new_number)); + lc->changing_element = NULL; + lc->new_number = -1; + clear_bit(__LC_DIRTY, &lc->flags); + smp_mb__after_clear_bit(); + RETURN(); +} + + +/** + * lc_put - give up refcnt of @e + * @lc: the lru cache to operate on + * @e: the element to put + * + * If refcnt reaches zero, the element is moved to the lru list, + * and a %TS_STARVING (if set) is cleared. + * Returns the new (post-decrement) refcnt. + */ +unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) +{ + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + BUG_ON(!e); + + PARANOIA_ENTRY(); + BUG_ON(e->refcnt == 0); + BUG_ON(e == lc->changing_element); + if (--e->refcnt == 0) { + /* move it to the front of LRU. */ + list_move(&e->list, &lc->lru); + lc->used--; + clear_bit(__LC_STARVING, &lc->flags); + smp_mb__after_clear_bit(); + } + RETURN(e->refcnt); +} + + +/** + * lc_set - associate index with label + * @lc: the lru cache to operate on + * @enr: the label to set + * @index: the element index to associate label with. + * + * Used to initialize the active set to some previously recorded state. + */ +void lc_set(struct lru_cache *lc, unsigned int enr, int index) +{ + struct lc_element *e; + + if (index < 0 || index >= lc->nr_elements) + return; + + e = lc_element_by_index(lc, index); + e->lc_number = enr; + + hlist_del_init(&e->colision); + hlist_add_head(&e->colision, lc->slot + lc_hash_fn(lc, enr)); + list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); +} + +/** + * lc_dump - Dump a complete LRU cache to seq in textual form. + * @lc: the lru cache to operate on + * @seq: the &struct seq_file pointer to seq_printf into + * @utext: user supplied "heading" or other info + * @detail: function pointer the user may provide to dump further details + * of the object the lc_element is embeded in. + */ +void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, + void (*detail) (struct seq_file *, struct lc_element *)) +{ + unsigned int nr_elements = lc->nr_elements; + struct lc_element *e; + int i; + + seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); + for (i = 0; i < nr_elements; i++) { + e = lc_element_by_index(lc, i); + if (e->lc_number == LC_FREE) { + seq_printf(seq, "\t%2d: FREE\n", i); + } else { + seq_printf(seq, "\t%2d: %4u %4u ", i, + e->lc_number, + e->refcnt); + detail(seq, e); + } + } +} + +EXPORT_SYMBOL(lc_create); +EXPORT_SYMBOL(lc_reset); +EXPORT_SYMBOL(lc_destroy); +EXPORT_SYMBOL(lc_set); +EXPORT_SYMBOL(lc_del); +EXPORT_SYMBOL(lc_try_get); +EXPORT_SYMBOL(lc_find); +EXPORT_SYMBOL(lc_get); +EXPORT_SYMBOL(lc_put); +EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_seq_printf_stats); +EXPORT_SYMBOL(lc_seq_dump_details); -- cgit v1.2.3 From 07681060d3c7ed0a2cfced27e3f75a613d4d448c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 17 Jun 2009 11:49:24 +0200 Subject: LRU_CACHE is tristate instead of bool Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- lib/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig b/lib/Kconfig index 8018c5b70562..4b7a42bc9e28 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -192,6 +192,6 @@ config NLATTR bool config LRU_CACHE - bool + tristate endmenu -- cgit v1.2.3 From 65b0b44dbf9dfec3f78ded84ee22b0a0a1bf82bf Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 25 Jun 2009 15:57:22 +0200 Subject: Tracking DRBD mainline (and minor cleanups) * drbd-8.3: (134 commits) Missing pices of the unaligned memory access stuff. possible fix for XEN crashes on disconnect fix regression: initial sync target hung in WFBitMapT fix a comment: there are no more ioctls. possible fix for XEN crashes on disconnect fix regression: initial sync target hung in WFBitMapT ... Removed compat code from lru_cache.h All STATIC -> static DRBD_ENABLE_FAULTS -> CONFIG_DRBD_FAULT_INJECTION * drbd-8.3: Fixed some errors/warnings when compiles without DBG_ALL_SYMBOLS (i.e. STATIC = static) Fixed a regression introduced with fb51e2eb1fac83839231499333bf683629388484 No longer include drbd_config.h directly, include drbd.h instead Got rid of drbd_config.h Support lru_cache as module Removing the drbd_buildtag.c file * drbd-8.3: Fixes for architectures that does not support unaligned memory accesses fix reading of the AL ring buffer sync handshake: fix detection of "unrelated" data - it was detected as "regular" split-brain * drbd-8.3: Preparing 8.3.2rc2 compat: 2.6.31 -- q->limits.* and accessor functions Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Kconfig | 32 ++++ drivers/block/drbd/Makefile | 2 +- drivers/block/drbd/drbd_actlog.c | 65 ++++--- drivers/block/drbd/drbd_bitmap.c | 35 ++-- drivers/block/drbd/drbd_buildtag.c | 7 - drivers/block/drbd/drbd_int.h | 65 ++++--- drivers/block/drbd/drbd_main.c | 222 +++++++++++++++-------- drivers/block/drbd/drbd_nl.c | 362 +++++++++++++++++++------------------ drivers/block/drbd/drbd_proc.c | 11 +- drivers/block/drbd/drbd_receiver.c | 235 ++++++++++++++---------- drivers/block/drbd/drbd_req.c | 10 +- drivers/block/drbd/drbd_strings.c | 6 +- drivers/block/drbd/drbd_tracing.c | 10 +- drivers/block/drbd/drbd_worker.c | 134 ++++++++++---- drivers/block/drbd/drbd_wrappers.h | 5 - include/linux/drbd.h | 16 +- include/linux/drbd_config.h | 37 ---- include/linux/drbd_limits.h | 4 + include/linux/drbd_nl.h | 4 +- include/linux/lru_cache.h | 87 +++++---- lib/lru_cache.c | 260 +++++++++++++++++--------- 21 files changed, 951 insertions(+), 658 deletions(-) delete mode 100644 drivers/block/drbd/drbd_buildtag.c delete mode 100644 include/linux/drbd_config.h diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index b3676771731d..f133a8925cbb 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -46,3 +46,35 @@ config DRBD_TRACE Say Y here if you want to be able to trace various events in DRBD. If unsure, say N. + +config DRBD_FAULT_INJECTION + bool "DRBD fault injection" + depends on BLK_DEV_DRBD + help + + Say Y here if you want to simulate IO errors, in order to test DRBD's + behavior. + + The actual simulation of IO errors is done by writing 3 values to + /sys/module/drbd/parameters/ + + enable_faults: bitmask of... + 1 meta data write + 2 read + 4 resync data write + 8 read + 16 data write + 32 data read + 64 read ahead + 128 kmalloc of bitmap + 256 allocation of EE (epoch_entries) + + fault_devs: bitmask of minor numbers + fault_rate: frequency in percent + + Example: Simulate data write errors on /dev/drbd0 with a probability of 5%. + echo 16 > /sys/module/drbd/parameters/enable_faults + echo 1 > /sys/module/drbd/parameters/fault_devs + echo 5 > /sys/module/drbd/parameters/fault_rate + + If unsure, say N. diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 9dd069b0ded0..68d1e7ce9aa3 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -1,4 +1,4 @@ -drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o +drbd-y := drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 6b096b1720ea..1e53d16c943c 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -77,7 +77,7 @@ void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...) va_end(ap); } -STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev, +static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, struct page *page, sector_t sector, int rw, int size) @@ -133,7 +133,7 @@ STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev, int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw) { - int hardsect_size, mask, ok; + int logical_block_size, mask, ok; int offset = 0; struct page *iop = mdev->md_io_page; @@ -141,15 +141,15 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, BUG_ON(!bdev->md_bdev); - hardsect_size = drbd_get_hardsect_size(bdev->md_bdev); - if (hardsect_size == 0) - hardsect_size = MD_SECTOR_SIZE; + logical_block_size = bdev_logical_block_size(bdev->md_bdev); + if (logical_block_size == 0) + logical_block_size = MD_SECTOR_SIZE; - /* in case hardsect_size != 512 [ s390 only? ] */ - if (hardsect_size != MD_SECTOR_SIZE) { - mask = (hardsect_size / MD_SECTOR_SIZE) - 1; + /* in case logical_block_size != 512 [ s390 only? ] */ + if (logical_block_size != MD_SECTOR_SIZE) { + mask = (logical_block_size / MD_SECTOR_SIZE) - 1; D_ASSERT(mask == 1 || mask == 3 || mask == 7); - D_ASSERT(hardsect_size == (mask+1) * MD_SECTOR_SIZE); + D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE); offset = sector & mask; sector = sector & ~mask; iop = mdev->md_io_tmpp; @@ -161,11 +161,11 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, void *hp = page_address(mdev->md_io_tmpp); ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, - READ, hardsect_size); + READ, logical_block_size); if (unlikely(!ok)) { dev_err(DEV, "drbd_md_sync_page_io(,%llus," - "READ [hardsect_size!=512]) failed!\n", + "READ [logical_block_size!=512]) failed!\n", (unsigned long long)sector); return 0; } @@ -180,14 +180,14 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, hardsect_size); + ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size); if (unlikely(!ok)) { dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); return 0; } - if (hardsect_size != MD_SECTOR_SIZE && !(rw & WRITE)) { + if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) { void *p = page_address(mdev->md_io_page); void *hp = page_address(mdev->md_io_tmpp); @@ -378,7 +378,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) * * Returns -1 on IO error, 0 on checksum error and 1 upon success. */ -STATIC int drbd_al_read_tr(struct drbd_conf *mdev, +static int drbd_al_read_tr(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, struct al_transaction *b, int index) @@ -416,14 +416,14 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) int i; int rv; int mx; - int cnr; int active_extents = 0; int transactions = 0; - int overflow = 0; - int from = -1; - int to = -1; - u32 from_tnr = -1; + int found_valid = 0; + int from = 0; + int to = 0; + u32 from_tnr = 0; u32 to_tnr = 0; + u32 cnr; mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT); @@ -444,22 +444,27 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) } cnr = be32_to_cpu(buffer->tr_number); - if (cnr == -1) - overflow = 1; - - if (cnr < from_tnr && !overflow) { + if (++found_valid == 1) { + from = i; + to = i; + from_tnr = cnr; + to_tnr = cnr; + continue; + } + if ((int)cnr - (int)from_tnr < 0) { + D_ASSERT(from_tnr - cnr + i - from == mx+1); from = i; from_tnr = cnr; } - if (cnr > to_tnr) { + if ((int)cnr - (int)to_tnr > 0) { + D_ASSERT(cnr - to_tnr == i - to); to = i; to_tnr = cnr; } } - if (from == -1 || to == -1) { + if (!found_valid) { dev_warn(DEV, "No usable activity log found.\n"); - mutex_unlock(&mdev->md_io_mutex); return 1; } @@ -524,7 +529,7 @@ cancel: return 1; } -STATIC void atodb_endio(struct bio *bio, int error) +static void atodb_endio(struct bio *bio, int error) { struct drbd_atodb_wait *wc = bio->bi_private; struct drbd_conf *mdev = wc->mdev; @@ -555,7 +560,7 @@ STATIC void atodb_endio(struct bio *bio, int error) #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL)) /* activity log to on disk bitmap -- prepare bio unless that sector * is already covered by previously prepared bios */ -STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev, +static int atodb_prepare_unless_covered(struct drbd_conf *mdev, struct bio **bios, unsigned int enr, struct drbd_atodb_wait *wc) __must_hold(local) @@ -803,7 +808,7 @@ void drbd_al_shrink(struct drbd_conf *mdev) wake_up(&mdev->al_wait); } -STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused) { struct update_odbm_work *udw = (struct update_odbm_work *)w; @@ -840,7 +845,7 @@ STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused * * TODO will be obsoleted once we have a caching lru of the on disk bitmap */ -STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, +static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, int count, int success) { struct lc_element *e; diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index d9b59b0611b0..417da6e3cea3 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "drbd_int.h" /* OPAQUE outside this file! @@ -150,7 +151,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) } /* word offset to long pointer */ -STATIC unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) +static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) { struct page *page; unsigned long page_nr; @@ -197,7 +198,7 @@ void bm_unmap(unsigned long *p_addr) * to be able to report device specific. */ -STATIC void bm_free_pages(struct page **pages, unsigned long number) +static void bm_free_pages(struct page **pages, unsigned long number) { unsigned long i; if (!pages) @@ -215,7 +216,7 @@ STATIC void bm_free_pages(struct page **pages, unsigned long number) } } -STATIC void bm_vk_free(void *ptr, int v) +static void bm_vk_free(void *ptr, int v) { if (v) vfree(ptr); @@ -226,7 +227,7 @@ STATIC void bm_vk_free(void *ptr, int v) /* * "have" and "want" are NUMBER OF PAGES. */ -STATIC struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) +static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) { struct page **old_pages = b->bm_pages; struct page **new_pages, *page; @@ -239,7 +240,11 @@ STATIC struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) if (have == want) return old_pages; - /* Trying kmalloc first, falling back to vmalloc... */ + /* Trying kmalloc first, falling back to vmalloc. + * GFP_KERNEL is ok, as this is done when a lower level disk is + * "attached" to the drbd. Context is receiver thread or cqueue + * thread. As we have no disk yet, we are not in the IO path, + * not even the IO path of the peer. */ bytes = sizeof(struct page *)*want; new_pages = kmalloc(bytes, GFP_KERNEL); if (!new_pages) { @@ -320,7 +325,7 @@ void drbd_bm_cleanup(struct drbd_conf *mdev) * this masks out the remaining bits. * Rerturns the number of bits cleared. */ -STATIC int bm_clear_surplus(struct drbd_bitmap *b) +static int bm_clear_surplus(struct drbd_bitmap *b) { const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; size_t w = b->bm_bits >> LN2_BPL; @@ -343,7 +348,7 @@ STATIC int bm_clear_surplus(struct drbd_bitmap *b) return cleared; } -STATIC void bm_set_surplus(struct drbd_bitmap *b) +static void bm_set_surplus(struct drbd_bitmap *b) { const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; size_t w = b->bm_bits >> LN2_BPL; @@ -362,7 +367,7 @@ STATIC void bm_set_surplus(struct drbd_bitmap *b) bm_unmap(p_addr); } -STATIC unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian) +static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian) { unsigned long *p_addr, *bm, offset = 0; unsigned long bits = 0; @@ -420,7 +425,7 @@ void _drbd_bm_recount_bits(struct drbd_conf *mdev, char *file, int line) } /* offset and len in long words.*/ -STATIC void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) +static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) { unsigned long *p_addr, *bm; size_t do_now, end; @@ -752,7 +757,7 @@ static void bm_async_io_complete(struct bio *bio, int error) bio_put(bio); } -STATIC void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local) +static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local) { /* we are process context. we always get a bio */ struct bio *bio = bio_alloc(GFP_KERNEL, 1); @@ -790,6 +795,8 @@ void bm_cpu_to_lel(struct drbd_bitmap *b) * this may be optimized by using * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0; * the following is still not optimal, but better than nothing */ + unsigned int i; + unsigned long *p_addr, *bm; if (b->bm_set == 0) { /* no page at all; avoid swap if all is 0 */ i = b->bm_number_of_pages; @@ -801,12 +808,10 @@ void bm_cpu_to_lel(struct drbd_bitmap *b) i = 0; } for (; i < b->bm_number_of_pages; i++) { - unsigned long *bm; - /* if you'd want to use kmap_atomic, you'd have to disable irq! */ - p_addr = kmap(b->bm_pages[i]); + p_addr = kmap_atomic(b->bm_pages[i], KM_USER0); for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++) *bm = cpu_to_lel(*bm); - kunmap(p_addr); + kunmap_atomic(p_addr, KM_USER0); } } # endif @@ -816,7 +821,7 @@ void bm_cpu_to_lel(struct drbd_bitmap *b) /* * bm_rw: read/write the whole bitmap from/to its on disk location. */ -STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) +static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) { struct drbd_bitmap *b = mdev->bitmap; /* sector_t sector; */ diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c deleted file mode 100644 index 20fe72a104d3..000000000000 --- a/drivers/block/drbd/drbd_buildtag.c +++ /dev/null @@ -1,7 +0,0 @@ -/* automatically generated. DO NOT EDIT. */ -#include -const char *drbd_buildtag(void) -{ - return "GIT-hash: b0abb3832a730d4fbd145013f6f51fc977bba3cc drbd/drbd_int.h" - " build by phil@fat-tyre, 2009-05-15 11:54:26"; -} diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 83f9f33e65ea..a63595d80579 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -106,22 +106,6 @@ extern char usermode_helper[]; struct drbd_conf; -#ifdef DBG_ALL_SYMBOLS -# define STATIC -#else -# define STATIC static -#endif - -/* - * Some Message Macros - *************************/ - -#define DUMPP(A) dev_err(DEV, #A " = %p in %s:%d\n", (A), __FILE__, __LINE__); -#define DUMPLU(A) dev_err(DEV, #A " = %lu in %s:%d\n", (unsigned long)(A), __FILE__, __LINE__); -#define DUMPLLU(A) dev_err(DEV, #A " = %llu in %s:%d\n", (unsigned long long)(A), __FILE__, __LINE__); -#define DUMPLX(A) dev_err(DEV, #A " = %lx in %s:%d\n", (A), __FILE__, __LINE__); -#define DUMPI(A) dev_err(DEV, #A " = %d in %s:%d\n", (int)(A), __FILE__, __LINE__); - /* to shorten dev_warn(DEV, "msg"); and relatives statements */ #define DEV (disk_to_dev(mdev->vdisk)) @@ -139,14 +123,14 @@ struct drbd_conf; /* Defines to control fault insertion */ enum { DRBD_FAULT_MD_WR = 0, /* meta data write */ - DRBD_FAULT_MD_RD, /* read */ - DRBD_FAULT_RS_WR, /* resync */ - DRBD_FAULT_RS_RD, - DRBD_FAULT_DT_WR, /* data */ - DRBD_FAULT_DT_RD, - DRBD_FAULT_DT_RA, /* data read ahead */ - DRBD_FAULT_BM_ALLOC, /* bitmap allocation */ - DRBD_FAULT_AL_EE, /* alloc ee */ + DRBD_FAULT_MD_RD = 1, /* read */ + DRBD_FAULT_RS_WR = 2, /* resync */ + DRBD_FAULT_RS_RD = 3, + DRBD_FAULT_DT_WR = 4, /* data */ + DRBD_FAULT_DT_RD = 5, + DRBD_FAULT_DT_RA = 6, /* data read ahead */ + DRBD_FAULT_BM_ALLOC = 7, /* bitmap allocation */ + DRBD_FAULT_AL_EE = 8, /* alloc ee */ DRBD_FAULT_MAX, }; @@ -332,6 +316,10 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) #endif } +#ifndef __packed +#define __packed __attribute__((packed)) +#endif + /* This is the layout for a packet on the wire. * The byteorder is the network byte order. * (except block_id and barrier fields. @@ -543,6 +531,7 @@ struct p_compressed_bm { u8 code[0]; } __packed; +/* DCBP: Drbd Compressed Bitmap Packet ... */ static inline enum drbd_bitmap_code DCBP_get_code(struct p_compressed_bm *p) { @@ -795,6 +784,8 @@ enum { * but worker thread is still handling the cleanup. * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, * while this is set. */ + RESIZE_PENDING, /* Size change detected locally, waiting for the response from + * the peer, if it changed there as well. */ }; struct drbd_bitmap; /* opaque for drbd_conf */ @@ -946,12 +937,16 @@ struct drbd_conf { unsigned long rs_mark_time; /* skipped because csum was equeal [unit BM_BLOCK_SIZE] */ unsigned long rs_same_csum; + + /* where does the admin want us to start? (sector) */ + sector_t ov_start_sector; + /* where are we now? (sector) */ sector_t ov_position; - /* Start sector of out of sync range. */ + /* Start sector of out of sync range (to merge printk reporting). */ sector_t ov_last_oos_start; /* size of out-of-sync range in sectors. */ sector_t ov_last_oos_size; - unsigned long ov_left; + unsigned long ov_left; /* in bits */ struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; @@ -991,7 +986,7 @@ struct drbd_conf { atomic_t pp_in_use; wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct page *md_io_tmpp; /* for hardsect_size != 512 [s390 only?] */ + struct page *md_io_tmpp; /* for logical_block_size != 512 */ struct mutex md_io_mutex; /* protects the md_io_buffer */ spinlock_t al_lock; wait_queue_head_t al_wait; @@ -1103,7 +1098,7 @@ extern int drbd_send_protocol(struct drbd_conf *mdev); extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); -extern int drbd_send_sizes(struct drbd_conf *mdev); +extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply); extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, @@ -1127,8 +1122,6 @@ extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, struct p_data *dp); extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, sector_t sector, int blksize, u64 block_id); -extern int _drbd_send_page(struct drbd_conf *mdev, struct page *page, - int offset, size_t size); extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, struct drbd_epoch_entry *e); extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); @@ -1348,7 +1341,9 @@ extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, con /* drbd_main.c */ extern struct kmem_cache *drbd_request_cache; -extern struct kmem_cache *drbd_ee_cache; +extern struct kmem_cache *drbd_ee_cache; /* epoch entries */ +extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ +extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ extern mempool_t *drbd_request_mempool; extern mempool_t *drbd_ee_mempool; @@ -1388,7 +1383,7 @@ extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); /* drbd_worker.c */ extern int drbd_worker(struct drbd_thread *thi); -extern void drbd_alter_sa(struct drbd_conf *mdev, int na); +extern int drbd_alter_sa(struct drbd_conf *mdev, int na); extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side); extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); @@ -1409,7 +1404,7 @@ static inline void ov_oos_print(struct drbd_conf *mdev) } -void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); +extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); /* worker callbacks */ extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); @@ -1704,9 +1699,11 @@ static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) } } +/* Returns the number of 512 byte sectors of the device */ static inline sector_t drbd_get_capacity(struct block_device *bdev) { - return bdev ? get_capacity(bdev->bd_disk) : 0; + /* return bdev ? get_capacity(bdev->bd_disk) : 0; */ + return bdev ? bdev->bd_inode->i_size >> 9 : 0; } /** diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ad296842b960..73c6a9da7645 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -26,7 +26,7 @@ #include #include #include - +#include #include #include #include @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -50,7 +49,6 @@ #include #include -#include #include #include "drbd_int.h" #include "drbd_tracing.h" @@ -73,12 +71,12 @@ int drbd_asender(struct drbd_thread *); int drbd_init(void); static int drbd_open(struct block_device *bdev, fmode_t mode); static int drbd_release(struct gendisk *gd, fmode_t mode); -STATIC int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); -STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, +static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); -STATIC int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); -STATIC void md_sync_timer_fn(unsigned long data); -STATIC int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static void md_sync_timer_fn(unsigned long data); +static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); DEFINE_TRACE(drbd_unplug); DEFINE_TRACE(drbd_uuid); @@ -95,6 +93,7 @@ DEFINE_TRACE(drbd_req); MODULE_AUTHOR("Philipp Reisner , " "Lars Ellenberg "); MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); +MODULE_VERSION(REL_VERSION); MODULE_LICENSE("GPL"); MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)"); MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); @@ -110,7 +109,7 @@ module_param(allow_oos, bool, 0); module_param(cn_idx, uint, 0444); module_param(proc_details, int, 0644); -#ifdef DRBD_ENABLE_FAULTS +#ifdef CONFIG_DRBD_FAULT_INJECTION int enable_faults; int fault_rate; static int fault_count; @@ -144,7 +143,9 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 struct drbd_conf **minor_table; struct kmem_cache *drbd_request_cache; -struct kmem_cache *drbd_ee_cache; +struct kmem_cache *drbd_ee_cache; /* epoch entries */ +struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ +struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t *drbd_request_mempool; mempool_t *drbd_ee_mempool; @@ -161,7 +162,7 @@ wait_queue_head_t drbd_pp_wait; DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5); -STATIC struct block_device_operations drbd_ops = { +static struct block_device_operations drbd_ops = { .owner = THIS_MODULE, .open = drbd_open, .release = drbd_release, @@ -198,10 +199,11 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) * Each &struct drbd_tl_epoch has a circular double linked list of requests * attached. */ -STATIC int tl_init(struct drbd_conf *mdev) +static int tl_init(struct drbd_conf *mdev) { struct drbd_tl_epoch *b; + /* during device minor initialization, we may well use GFP_KERNEL */ b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL); if (!b) return 0; @@ -222,7 +224,7 @@ STATIC int tl_init(struct drbd_conf *mdev) return 1; } -STATIC void tl_cleanup(struct drbd_conf *mdev) +static void tl_cleanup(struct drbd_conf *mdev) { D_ASSERT(mdev->oldest_tle == mdev->newest_tle); D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); @@ -472,7 +474,7 @@ int drbd_io_error(struct drbd_conf *mdev, int force_detach) * @os: old (current) state. * @ns: new (wanted) state. */ -STATIC int cl_wide_st_chg(struct drbd_conf *mdev, +static int cl_wide_st_chg(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns) { return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && @@ -513,15 +515,15 @@ void drbd_force_state(struct drbd_conf *mdev, drbd_change_state(mdev, CS_HARD, mask, val); } -STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns); -STATIC int is_valid_state_transition(struct drbd_conf *, +static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns); +static int is_valid_state_transition(struct drbd_conf *, union drbd_state, union drbd_state); -STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, int *warn_sync_abort); int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); -STATIC enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, +static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) { union drbd_state os, ns; @@ -565,7 +567,7 @@ STATIC enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev, * Should not be called directly, use drbd_request_state() or * _drbd_request_state(). */ -STATIC int drbd_req_state(struct drbd_conf *mdev, +static int drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val, enum chg_state_flags f) { @@ -658,7 +660,7 @@ int _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, return rv; } -STATIC void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) +static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) { dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n", name, @@ -705,7 +707,7 @@ void print_st_err(struct drbd_conf *mdev, * @mdev: DRBD device. * @ns: State to consider. */ -STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) +static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) { /* See drbd_state_sw_errors in drbd_strings.c */ @@ -740,11 +742,11 @@ STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT) rv = SS_NO_UP_TO_DATE_DISK; - else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) - rv = SS_BOTH_INCONSISTENT; + else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT) + rv = SS_NO_LOCAL_DISK; - else if (ns.conn > C_CONNECTED && (ns.disk == D_DISKLESS || ns.pdsk == D_DISKLESS)) - rv = SS_SYNCING_DISKLESS; + else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) + rv = SS_NO_REMOTE_DISK; else if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S || @@ -770,7 +772,7 @@ STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) * @ns: new state. * @os: old state. */ -STATIC int is_valid_state_transition(struct drbd_conf *mdev, +static int is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, union drbd_state os) { int rv = SS_SUCCESS; @@ -821,7 +823,7 @@ STATIC int is_valid_state_transition(struct drbd_conf *mdev, * When we loose connection, we have to set the state of the peers disk (pdsk) * to D_UNKNOWN. This rule and many more along those lines are in this function. */ -STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, int *warn_sync_abort) { enum drbd_fencing_p fp; @@ -948,6 +950,25 @@ STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state return ns; } +/* helper for __drbd_set_state */ +static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) +{ + if (cs == C_VERIFY_T) { + /* starting online verify from an arbitrary position + * does not fit well into the existion protocol. + * on C_VERIFY_T, we initialize ov_left and friends + * implicitly in receive_DataRequest once the + * first P_OV_REQUEST is received */ + mdev->ov_start_sector = ~(sector_t)0; + } else { + unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); + if (bit >= mdev->rs_total) + mdev->ov_start_sector = + BM_BIT_TO_SECT(mdev->rs_total - 1); + mdev->ov_position = mdev->ov_start_sector; + } +} + /** * __drbd_set_state() - Set a new DRBD state * @mdev: DRBD device. @@ -1043,6 +1064,15 @@ int __drbd_set_state(struct drbd_conf *mdev, mod_timer(&mdev->resync_timer, jiffies); } + /* aborted verify run. log the last position */ + if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && + ns.conn < C_CONNECTED) { + mdev->ov_start_sector = + BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left); + dev_info(DEV, "Online Verify reached sector %llu\n", + (unsigned long long)mdev->ov_start_sector); + } + if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { dev_info(DEV, "Syncer continues.\n"); @@ -1068,16 +1098,24 @@ int __drbd_set_state(struct drbd_conf *mdev, if (os.conn == C_CONNECTED && (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { mdev->ov_position = 0; - mdev->ov_left = mdev->rs_total = mdev->rs_mark_left = drbd_bm_bits(mdev); + if (mdev->agreed_pro_version >= 90) + set_ov_position(mdev, ns.conn); + else + mdev->ov_start_sector = 0; + mdev->ov_left = mdev->rs_total + - BM_SECT_TO_BIT(mdev->ov_position); mdev->rs_start = mdev->rs_mark_time = jiffies; mdev->ov_last_oos_size = 0; mdev->ov_last_oos_start = 0; - if (ns.conn == C_VERIFY_S) + if (ns.conn == C_VERIFY_S) { + dev_info(DEV, "Starting Online Verify from sector %llu\n", + (unsigned long long)mdev->ov_position); mod_timer(&mdev->resync_timer, jiffies); + } } if (get_ldev(mdev)) { @@ -1140,7 +1178,7 @@ int __drbd_set_state(struct drbd_conf *mdev, return rv; } -STATIC int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) { struct after_state_chg_work *ascw; @@ -1180,7 +1218,7 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv) * @ns: new state. * @flags: Flags */ -STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, +static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { enum drbd_fencing_p fp; @@ -1260,7 +1298,7 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */ mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */ - drbd_send_sizes(mdev); /* to start sync... */ + drbd_send_sizes(mdev, 0); /* to start sync... */ drbd_send_uuids(mdev); drbd_send_state(mdev); } @@ -1327,7 +1365,7 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, (os.user_isp && !ns.user_isp)) resume_next_sg(mdev); - /* Upon network connection, we need to start the received */ + /* Upon network connection, we need to start the receiver */ if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) drbd_thread_start(&mdev->receiver); @@ -1347,7 +1385,7 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } -STATIC int drbd_thread_setup(void *arg) +static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; struct drbd_conf *mdev = thi->mdev; @@ -1389,7 +1427,7 @@ restart: return retval; } -STATIC void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, +static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, int (*func) (struct drbd_thread *)) { spin_lock_init(&thi->t_lock); @@ -1466,6 +1504,7 @@ int drbd_thread_start(struct drbd_thread *thi) void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) { unsigned long flags; + enum drbd_thread_state ns = restart ? Restarting : Exiting; /* may be called from state engine, holding the req lock irqsave */ @@ -1685,7 +1724,9 @@ int drbd_send_protocol(struct drbd_conf *mdev) if (mdev->agreed_pro_version >= 87) size += strlen(mdev->net_conf->integrity_alg) + 1; - p = kmalloc(size, GFP_KERNEL); + /* we must not recurse into our own queue, + * as that is blocked during handshake */ + p = kmalloc(size, GFP_NOIO); if (p == NULL) return 0; @@ -1750,7 +1791,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) (struct p_header *)&p, sizeof(p)); } -int drbd_send_sizes(struct drbd_conf *mdev) +int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) { struct p_sizes p; sector_t d_size, u_size; @@ -1772,8 +1813,8 @@ int drbd_send_sizes(struct drbd_conf *mdev) p.d_size = cpu_to_be64(d_size); p.u_size = cpu_to_be64(u_size); - p.c_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); - p.max_segment_size = cpu_to_be32(mdev->rq_queue->max_segment_size); + p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); + p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue)); p.queue_order_type = cpu_to_be32(q_order_type); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, @@ -1846,7 +1887,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, int bits; /* may we use this feature? */ - if ((mdev->sync_conf.use_rle_encoding == 0) || + if ((mdev->sync_conf.use_rle == 0) || (mdev->agreed_pro_version < 90)) return 0; @@ -2057,7 +2098,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) * @blksize: size in byte, needs to be in big endian byte order * @block_id: Id, big endian byte order */ -STATIC int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, +static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, u64 sector, u32 blksize, u64 block_id) @@ -2179,7 +2220,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) * returns FALSE if we should retry, * TRUE if we think connection is dead */ -STATIC int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) +static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) { int drop_it; /* long elapsed = (long)(jiffies - mdev->last_received); */ @@ -2223,7 +2264,7 @@ STATIC int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * * As a workaround, we disable sendpage on pages * with page_count == 0 or PageSlab. */ -STATIC int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, +static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, int offset, size_t size) { int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); @@ -2233,7 +2274,7 @@ STATIC int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, return sent == size; } -int _drbd_send_page(struct drbd_conf *mdev, struct page *page, +static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, int offset, size_t size) { mm_segment_t oldfs = get_fs(); @@ -2527,7 +2568,7 @@ static int drbd_release(struct gendisk *gd, fmode_t mode) return 0; } -STATIC void drbd_unplug_fn(struct request_queue *q) +static void drbd_unplug_fn(struct request_queue *q) { struct drbd_conf *mdev = q->queuedata; @@ -2558,7 +2599,7 @@ STATIC void drbd_unplug_fn(struct request_queue *q) drbd_kick_lo(mdev); } -STATIC void drbd_set_defaults(struct drbd_conf *mdev) +static void drbd_set_defaults(struct drbd_conf *mdev) { mdev->sync_conf.after = DRBD_AFTER_DEF; mdev->sync_conf.rate = DRBD_RATE_DEF; @@ -2697,7 +2738,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) } -STATIC void drbd_destroy_mempools(void) +static void drbd_destroy_mempools(void) { struct page *page; @@ -2718,16 +2759,22 @@ STATIC void drbd_destroy_mempools(void) kmem_cache_destroy(drbd_ee_cache); if (drbd_request_cache) kmem_cache_destroy(drbd_request_cache); + if (drbd_bm_ext_cache) + kmem_cache_destroy(drbd_bm_ext_cache); + if (drbd_al_ext_cache) + kmem_cache_destroy(drbd_al_ext_cache); drbd_ee_mempool = NULL; drbd_request_mempool = NULL; drbd_ee_cache = NULL; drbd_request_cache = NULL; + drbd_bm_ext_cache = NULL; + drbd_al_ext_cache = NULL; return; } -STATIC int drbd_create_mempools(void) +static int drbd_create_mempools(void) { struct page *page; const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count; @@ -2737,19 +2784,31 @@ STATIC int drbd_create_mempools(void) drbd_request_mempool = NULL; drbd_ee_cache = NULL; drbd_request_cache = NULL; + drbd_bm_ext_cache = NULL; + drbd_al_ext_cache = NULL; drbd_pp_pool = NULL; /* caches */ drbd_request_cache = kmem_cache_create( - "drbd_req_cache", sizeof(struct drbd_request), 0, 0, NULL); + "drbd_req", sizeof(struct drbd_request), 0, 0, NULL); if (drbd_request_cache == NULL) goto Enomem; drbd_ee_cache = kmem_cache_create( - "drbd_ee_cache", sizeof(struct drbd_epoch_entry), 0, 0, NULL); + "drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL); if (drbd_ee_cache == NULL) goto Enomem; + drbd_bm_ext_cache = kmem_cache_create( + "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL); + if (drbd_bm_ext_cache == NULL) + goto Enomem; + + drbd_al_ext_cache = kmem_cache_create( + "drbd_al", sizeof(struct lc_element), 0, 0, NULL); + if (drbd_al_ext_cache == NULL) + goto Enomem; + /* mempools */ drbd_request_mempool = mempool_create(number, mempool_alloc_slab, mempool_free_slab, drbd_request_cache); @@ -2780,7 +2839,7 @@ Enomem: return -ENOMEM; } -STATIC int drbd_notify_sys(struct notifier_block *this, unsigned long code, +static int drbd_notify_sys(struct notifier_block *this, unsigned long code, void *unused) { /* just so we have it. you never know what interessting things we @@ -2790,7 +2849,7 @@ STATIC int drbd_notify_sys(struct notifier_block *this, unsigned long code, return NOTIFY_DONE; } -STATIC struct notifier_block drbd_notifier = { +static struct notifier_block drbd_notifier = { .notifier_call = drbd_notify_sys, }; @@ -2836,7 +2895,7 @@ static void drbd_delete_device(unsigned int minor) ERR_IF (!list_empty(&mdev->data.work.q)) { struct list_head *lp; list_for_each(lp, &mdev->data.work.q) { - DUMPP(lp); + dev_err(DEV, "lp = %p\n", lp); } }; /* end paranoia asserts */ @@ -2876,7 +2935,7 @@ static void drbd_delete_device(unsigned int minor) drbd_free_mdev(mdev); } -STATIC void drbd_cleanup(void) +static void drbd_cleanup(void) { unsigned int i; @@ -2958,7 +3017,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) goto out_no_q; mdev->rq_queue = q; q->queuedata = mdev; - q->max_segment_size = DRBD_MAX_SEGMENT_SIZE; + blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE); disk = alloc_disk(1); if (!disk) @@ -2994,7 +3053,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) if (drbd_bm_init(mdev)) goto out_no_bitmap; - /* no need to lock access, we are still initializing the module. */ + /* no need to lock access, we are still initializing this minor device. */ if (!tl_init(mdev)) goto out_no_tl; @@ -3143,10 +3202,12 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) void drbd_free_sock(struct drbd_conf *mdev) { if (mdev->data.socket) { + kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR); sock_release(mdev->data.socket); mdev->data.socket = NULL; } if (mdev->meta.socket) { + kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR); sock_release(mdev->meta.socket); mdev->meta.socket = NULL; } @@ -3344,7 +3405,7 @@ void drbd_md_mark_dirty(struct drbd_conf *mdev) } -STATIC void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) +static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) { int i; @@ -3472,7 +3533,7 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev) return rv; } -STATIC int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) { struct bm_io_work *work = (struct bm_io_work *)w; int rv; @@ -3581,14 +3642,14 @@ int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag) return (bdev->md.flags & flag) != 0; } -STATIC void md_sync_timer_fn(unsigned long data) +static void md_sync_timer_fn(unsigned long data) { struct drbd_conf *mdev = (struct drbd_conf *) data; drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work); } -STATIC int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) { dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); drbd_md_sync(mdev); @@ -3596,7 +3657,7 @@ STATIC int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) return 1; } -#ifdef DRBD_ENABLE_FAULTS +#ifdef CONFIG_DRBD_FAULT_INJECTION /* Fault insertion support including random number generator shamelessly * stolen from kernel/rcutorture.c */ struct fault_random_state { @@ -3612,7 +3673,7 @@ struct fault_random_state { * Crude but fast random-number generator. Uses a linear congruential * generator, with occasional help from get_random_bytes(). */ -STATIC unsigned long +static unsigned long _drbd_fault_random(struct fault_random_state *rsp) { long refresh; @@ -3626,18 +3687,18 @@ _drbd_fault_random(struct fault_random_state *rsp) return swahw32(rsp->state); } -STATIC char * +static char * _drbd_fault_str(unsigned int type) { static char *_faults[] = { - "Meta-data write", - "Meta-data read", - "Resync write", - "Resync read", - "Data write", - "Data read", - "Data read ahead", - "BM allocation", - "EE allocation" + [DRBD_FAULT_MD_WR] = "Meta-data write", + [DRBD_FAULT_MD_RD] = "Meta-data read", + [DRBD_FAULT_RS_WR] = "Resync write", + [DRBD_FAULT_RS_RD] = "Resync read", + [DRBD_FAULT_DT_WR] = "Data write", + [DRBD_FAULT_DT_RD] = "Data read", + [DRBD_FAULT_DT_RA] = "Data read ahead", + [DRBD_FAULT_BM_ALLOC] = "BM allocation", + [DRBD_FAULT_AL_EE] = "EE allocation" }; return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**"; @@ -3665,5 +3726,22 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) } #endif +const char *drbd_buildtag(void) +{ + /* DRBD built from external sources has here a reference to the + git hash of the source code. */ + + static char buildtag[38] = "\0uilt-in"; + + if (buildtag[0] == 0) { + if (THIS_MODULE != NULL) + sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); + else + buildtag[0] = 'b'; + } + + return buildtag; +} + module_init(drbd_init) module_exit(drbd_cleanup) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c6217d6a2465..c3d438ccd408 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -25,34 +25,40 @@ #include #include +#include #include #include #include #include #include -#include -#include #include #include #include "drbd_int.h" #include "drbd_tracing.h" #include "drbd_wrappers.h" +#include #include #include +static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int); +static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *); +static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *); + /* see get_sb_bdev and bd_claim */ static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; /* Generate the tag_list to struct functions */ #define NL_PACKET(name, number, fields) \ -STATIC int name ## _from_tags(struct drbd_conf *mdev, \ +static int name ## _from_tags(struct drbd_conf *mdev, \ + unsigned short *tags, struct name *arg) __attribute__ ((unused)); \ +static int name ## _from_tags(struct drbd_conf *mdev, \ unsigned short *tags, struct name *arg) \ { \ int tag; \ int dlen; \ \ - while ((tag = *tags++) != TT_END) { \ - dlen = *tags++; \ + while ((tag = get_unaligned(tags++)) != TT_END) { \ + dlen = get_unaligned(tags++); \ switch (tag_number(tag)) { \ fields \ default: \ @@ -67,16 +73,16 @@ STATIC int name ## _from_tags(struct drbd_conf *mdev, \ } #define NL_INTEGER(pn, pr, member) \ case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \ - arg->member = *(int *)(tags); \ - break; + arg->member = get_unaligned((int *)(tags)); \ + break; #define NL_INT64(pn, pr, member) \ case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \ - arg->member = *(u64 *)(tags); \ - break; + arg->member = get_unaligned((u64 *)(tags)); \ + break; #define NL_BIT(pn, pr, member) \ case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \ - arg->member = *(char *)(tags) ? 1 : 0; \ - break; + arg->member = *(char *)(tags) ? 1 : 0; \ + break; #define NL_STRING(pn, pr, member, len) \ case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \ if (dlen > len) { \ @@ -91,7 +97,10 @@ STATIC int name ## _from_tags(struct drbd_conf *mdev, \ /* Generate the struct to tag_list functions */ #define NL_PACKET(name, number, fields) \ -STATIC unsigned short* \ +static unsigned short* \ +name ## _to_tags(struct drbd_conf *mdev, \ + struct name *arg, unsigned short *tags) __attribute__ ((unused)); \ +static unsigned short* \ name ## _to_tags(struct drbd_conf *mdev, \ struct name *arg, unsigned short *tags) \ { \ @@ -100,23 +109,23 @@ name ## _to_tags(struct drbd_conf *mdev, \ } #define NL_INTEGER(pn, pr, member) \ - *tags++ = pn | pr | TT_INTEGER; \ - *tags++ = sizeof(int); \ - *(int *)tags = arg->member; \ + put_unaligned(pn | pr | TT_INTEGER, tags++); \ + put_unaligned(sizeof(int), tags++); \ + put_unaligned(arg->member, (int *)tags); \ tags = (unsigned short *)((char *)tags+sizeof(int)); #define NL_INT64(pn, pr, member) \ - *tags++ = pn | pr | TT_INT64; \ - *tags++ = sizeof(u64); \ - *(u64 *)tags = arg->member; \ + put_unaligned(pn | pr | TT_INT64, tags++); \ + put_unaligned(sizeof(u64), tags++); \ + put_unaligned(arg->member, (u64 *)tags); \ tags = (unsigned short *)((char *)tags+sizeof(u64)); #define NL_BIT(pn, pr, member) \ - *tags++ = pn | pr | TT_BIT; \ - *tags++ = sizeof(char); \ + put_unaligned(pn | pr | TT_BIT, tags++); \ + put_unaligned(sizeof(char), tags++); \ *(char *)tags = arg->member; \ tags = (unsigned short *)((char *)tags+sizeof(char)); #define NL_STRING(pn, pr, member, len) \ - *tags++ = pn | pr | TT_STRING; \ - *tags++ = arg->member ## _len; \ + put_unaligned(pn | pr | TT_STRING, tags++); \ + put_unaligned(arg->member ## _len, tags++); \ memcpy(tags, arg->member, arg->member ## _len); \ tags = (unsigned short *)((char *)tags + arg->member ## _len); #include "linux/drbd_nl.h" @@ -126,16 +135,42 @@ void drbd_nl_send_reply(struct cn_msg *, int); int drbd_khelper(struct drbd_conf *mdev, char *cmd) { - char mb[12]; + char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL, /* Will be set to address family */ + NULL, /* Will be set to address */ + NULL }; + + char mb[12], af[20], ad[60], *afs; char *argv[] = {usermode_helper, cmd, mb, NULL }; int ret; - static char *envp[] = { "HOME=/", - "TERM=linux", - "PATH=/sbin:/usr/sbin:/bin:/usr/bin", - NULL }; snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); + if (get_net_conf(mdev)) { + switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) { + case AF_INET6: + afs = "ipv6"; + snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6", + &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr); + break; + case AF_INET: + afs = "ipv4"; + snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr); + break; + default: + afs = "ssocks"; + snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", + &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr); + } + snprintf(af, 20, "DRBD_PEER_AF=%s", afs); + envp[3]=af; + envp[4]=ad; + put_net_conf(mdev); + } + dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); drbd_bcast_ev_helper(mdev, cmd); @@ -354,7 +389,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } -STATIC int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { struct primary primary_args; @@ -371,7 +406,7 @@ STATIC int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, return 0; } -STATIC int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0); @@ -381,7 +416,7 @@ STATIC int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* initializes the md.*_offset members, so we are able to find * the on disk meta data */ -STATIC void drbd_md_set_sector_offsets(struct drbd_conf *mdev, +static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { sector_t md_size_sect = 0; @@ -533,15 +568,12 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_ho md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev) || prev_size != mdev->ldev->md.md_size_sect; - if (md_moved) { - dev_warn(DEV, "Moving meta-data.\n"); - /* assert: (flexible) internal meta data */ - } - if (la_size_changed || md_moved) { drbd_al_shrink(mdev); /* All extents inactive. */ - dev_info(DEV, "Writing the whole bitmap, size changed\n"); - rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); + dev_info(DEV, "Writing the whole bitmap, %s\n", + la_size_changed && md_moved ? "size changed and md moved" : + la_size_changed ? "size changed" : "md moved"); + rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */ drbd_md_mark_dirty(mdev); } @@ -607,7 +639,7 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) * failed, and 0 on success. You should call drbd_md_sync() after you called * this function. */ -STATIC int drbd_check_al_size(struct drbd_conf *mdev) +static int drbd_check_al_size(struct drbd_conf *mdev) { struct lru_cache *n, *t; struct lc_element *e; @@ -623,8 +655,8 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_create("act_log", mdev->sync_conf.al_extents, - sizeof(struct lc_element), 0); + n = lc_create("act_log", drbd_al_ext_cache, + mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); if (n == NULL) { dev_err(DEV, "Cannot allocate act_log lru!\n"); @@ -659,31 +691,25 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu { struct request_queue * const q = mdev->rq_queue; struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; - /* unsigned int old_max_seg_s = q->max_segment_size; */ int max_segments = mdev->ldev->dc.max_bio_bvecs; if (b->merge_bvec_fn && !mdev->ldev->dc.use_bmbv) max_seg_s = PAGE_SIZE; - max_seg_s = min(b->max_sectors * b->hardsect_size, max_seg_s); + max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s); - q->max_sectors = max_seg_s >> 9; - if (max_segments) { - q->max_phys_segments = max_segments; - q->max_hw_segments = max_segments; - } else { - q->max_phys_segments = MAX_PHYS_SEGMENTS; - q->max_hw_segments = MAX_HW_SEGMENTS; - } - q->max_segment_size = max_seg_s; - q->hardsect_size = 512; - q->seg_boundary_mask = PAGE_SIZE-1; + blk_queue_max_sectors(q, max_seg_s >> 9); + blk_queue_max_phys_segments(q, max_segments ? max_segments : MAX_PHYS_SEGMENTS); + blk_queue_max_hw_segments(q, max_segments ? max_segments : MAX_HW_SEGMENTS); + blk_queue_max_segment_size(q, max_seg_s); + blk_queue_logical_block_size(q, 512); + blk_queue_segment_boundary(q, PAGE_SIZE-1); blk_queue_stack_limits(q, b); if (b->merge_bvec_fn) dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n", b->merge_bvec_fn); - dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", q->max_segment_size); + dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q)); if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", @@ -725,7 +751,7 @@ static void drbd_reconfig_done(struct drbd_conf *mdev) /* does always return 0; * interesting return code is in reply->ret_code */ -STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { enum drbd_ret_codes retcode; @@ -738,7 +764,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp union drbd_state ns, os; int rv; int cp_discovered = 0; - int hardsect_size; + int logical_block_size; drbd_reconfig_start(mdev); @@ -748,14 +774,13 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto fail; } - nbc = kmalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); + /* allocation not in the IO path, cqueue thread context */ + nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); if (!nbc) { retcode = ERR_NOMEM; goto fail; } - memset(&nbc->md, 0, sizeof(struct drbd_md)); - memset(&nbc->dc, 0, sizeof(struct disk_conf)); nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF; nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF; nbc->dc.fencing = DRBD_FENCING_DEF; @@ -766,9 +791,6 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto fail; } - nbc->lo_file = NULL; - nbc->md_file = NULL; - if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { retcode = ERR_MD_IDX_INVALID; goto fail; @@ -817,18 +839,24 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto fail; } - resync_lru = lc_create("resync", 61, sizeof(struct bm_extent), + resync_lru = lc_create("resync", drbd_bm_ext_cache, + 61, sizeof(struct bm_extent), offsetof(struct bm_extent, lce)); if (!resync_lru) { retcode = ERR_NOMEM; goto release_bdev_fail; } + /* meta_dev_idx >= 0: external fixed size, + * possibly multiple drbd sharing one meta device. + * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is + * not yet used by some other drbd minor! + * (if you use drbd.conf + drbdadm, + * that should check it for you already; but if you don't, or someone + * fooled it, we need to double check here) */ nbc->md_bdev = inode2->i_bdev; - if (bd_claim(nbc->md_bdev, - (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || - nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT) ? - (void *)mdev : (void *) drbd_m_holder)) { + if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev + : (void *) drbd_m_holder)) { retcode = ERR_BDCLAIM_MD_DISK; goto release_bdev_fail; } @@ -936,19 +964,19 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto force_diskless_dec; } - /* allocate a second IO page if hardsect_size != 512 */ - hardsect_size = drbd_get_hardsect_size(nbc->md_bdev); - if (hardsect_size == 0) - hardsect_size = MD_SECTOR_SIZE; + /* allocate a second IO page if logical_block_size != 512 */ + logical_block_size = bdev_logical_block_size(nbc->md_bdev); + if (logical_block_size == 0) + logical_block_size = MD_SECTOR_SIZE; - if (hardsect_size != MD_SECTOR_SIZE) { + if (logical_block_size != MD_SECTOR_SIZE) { if (!mdev->md_io_tmpp) { struct page *page = alloc_page(GFP_NOIO); if (!page) goto force_diskless_dec; - dev_warn(DEV, "Meta data's bdev hardsect_size = %d != %d\n", - hardsect_size, MD_SECTOR_SIZE); + dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n", + logical_block_size, MD_SECTOR_SIZE); dev_warn(DEV, "Workaround engaged (has performace impact).\n"); mdev->md_io_tmpp = page; @@ -1122,14 +1150,14 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp return 0; } -STATIC int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS)); return 0; } -STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { int i, ns; @@ -1154,6 +1182,7 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } + /* allocation not in the IO path, cqueue thread context */ new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); if (!new_conf) { retcode = ERR_NOMEM; @@ -1168,6 +1197,7 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF; + new_conf->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF; new_conf->ko_count = DRBD_KO_COUNT_DEF; new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF; new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF; @@ -1184,7 +1214,7 @@ STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } if (new_conf->two_primaries - && (new_conf->wire_protocol != DRBD_PROT_C)) { + && (new_conf->wire_protocol != DRBD_PROT_C)) { retcode = ERR_NOT_PROTO_C; goto fail; }; @@ -1366,7 +1396,7 @@ fail: return 0; } -STATIC int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { int retcode; @@ -1427,7 +1457,7 @@ void resync_after_online_grow(struct drbd_conf *mdev) _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); } -STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { struct resize rs; @@ -1472,10 +1502,11 @@ STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } if (mdev->state.conn == C_CONNECTED && (dd != unchanged || ldsc)) { - drbd_send_uuids(mdev); - drbd_send_sizes(mdev); if (dd == grew) - resync_after_online_grow(mdev); + set_bit(RESIZE_PENDING, &mdev->flags); + + drbd_send_uuids(mdev); + drbd_send_sizes(mdev, 1); } fail: @@ -1483,14 +1514,13 @@ STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, return 0; } -STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { int retcode = NO_ERROR; int err; int ovr; /* online verify running */ int rsr; /* re-sync running */ - struct drbd_conf *odev; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; struct syncer_conf sc; @@ -1510,23 +1540,6 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n goto fail; } - if (sc.after != -1) { - if (sc.after < -1 || minor_to_mdev(sc.after) == NULL) { - retcode = ERR_SYNC_AFTER; - goto fail; - } - odev = minor_to_mdev(sc.after); /* check against loops in */ - while (1) { - if (odev == mdev) { - retcode = ERR_SYNC_AFTER_CYCLE; - goto fail; - } - if (odev->sync_conf.after == -1) - break; /* no cycles. */ - odev = minor_to_mdev(odev->sync_conf.after); - } - } - /* re-sync running */ rsr = ( mdev->state.conn == C_SYNC_SOURCE || mdev->state.conn == C_SYNC_TARGET || @@ -1576,7 +1589,8 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } } - if (sc.cpu_mask[0] != 0) { + /* silently ignore cpu mask on UP kernel */ + if (NR_CPUS > 1 && sc.cpu_mask[0] != 0) { err = __bitmap_parse(sc.cpu_mask, 32, 0, (unsigned long *)&n_cpu_mask, NR_CPUS); if (err) { dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); @@ -1594,8 +1608,16 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } #undef AL_MAX + /* most sanity checks done, try to assign the new sync-after + * dependency. need to hold the global lock in there, + * to avoid a race in the dependency loop check. */ + retcode = drbd_alter_sa(mdev, sc.after); + if (retcode != NO_ERROR) + goto fail; + + /* ok, assign the rest of it as well. + * lock against receive_SyncParam() */ spin_lock(&mdev->peer_seq_lock); - /* lock against receive_SyncParam() */ mdev->sync_conf = sc; if (!rsr) { @@ -1630,8 +1652,6 @@ STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (mdev->state.conn >= C_CONNECTED) drbd_send_sync_param(mdev, &sc); - drbd_alter_sa(mdev, sc.after); - if (!cpus_equal(mdev->cpu_mask, n_cpu_mask)) { mdev->cpu_mask = n_cpu_mask; mdev->cpu_mask = drbd_calc_cpu_mask(mdev); @@ -1648,7 +1668,7 @@ fail: return 0; } -STATIC int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { int retcode; @@ -1674,7 +1694,7 @@ STATIC int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl return 0; } -STATIC int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { @@ -1683,7 +1703,7 @@ STATIC int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re return 0; } -STATIC int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { int retcode = NO_ERROR; @@ -1695,7 +1715,7 @@ STATIC int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl return 0; } -STATIC int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { int retcode = NO_ERROR; @@ -1707,7 +1727,7 @@ STATIC int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n return 0; } -STATIC int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { reply->ret_code = drbd_request_state(mdev, NS(susp, 1)); @@ -1715,21 +1735,21 @@ STATIC int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl return 0; } -STATIC int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { reply->ret_code = drbd_request_state(mdev, NS(susp, 0)); return 0; } -STATIC int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED)); return 0; } -STATIC int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { unsigned short *tl; @@ -1747,12 +1767,12 @@ STATIC int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl } tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl); - *tl++ = TT_END; /* Close the tag list */ + put_unaligned(TT_END, tl++); /* Close the tag list */ return (int)((char *)tl - (char *)reply->tag_list); } -STATIC int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { unsigned short *tl = reply->tag_list; @@ -1766,19 +1786,16 @@ STATIC int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) { if (get_ldev(mdev)) { drbd_get_syncer_progress(mdev, &rs_left, &res); - *tl++ = T_sync_progress; - *tl++ = sizeof(int); - memcpy(tl, &res, sizeof(int)); - tl = (unsigned short *)((char *)tl + sizeof(int)); + tl = tl_add_int(tl, T_sync_progress, &res); put_ldev(mdev); } } - *tl++ = TT_END; /* Close the tag list */ + put_unaligned(TT_END, tl++); /* Close the tag list */ return (int)((char *)tl - (char *)reply->tag_list); } -STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { unsigned short *tl; @@ -1786,18 +1803,11 @@ STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp tl = reply->tag_list; if (get_ldev(mdev)) { - /* This is a hand crafted add tag ;) */ - *tl++ = T_uuids; - *tl++ = UI_SIZE*sizeof(u64); - memcpy(tl, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64)); - tl = (unsigned short *)((char *)tl + UI_SIZE*sizeof(u64)); - *tl++ = T_uuids_flags; - *tl++ = sizeof(int); - memcpy(tl, &mdev->ldev->md.flags, sizeof(int)); - tl = (unsigned short *)((char *)tl + sizeof(int)); + tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64)); + tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags); put_ldev(mdev); } - *tl++ = TT_END; /* Close the tag list */ + put_unaligned(TT_END, tl++); /* Close the tag list */ return (int)((char *)tl - (char *)reply->tag_list); } @@ -1808,7 +1818,7 @@ STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * @nlp: Netlink/connector packet from drbdsetup * @reply: Reply packet for drbdsetup */ -STATIC int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { unsigned short *tl; @@ -1819,26 +1829,31 @@ STATIC int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_r rv = mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT; - /* This is a hand crafted add tag ;) */ - *tl++ = T_use_degraded; - *tl++ = sizeof(char); - *((char *)tl) = rv; - tl = (unsigned short *)((char *)tl + sizeof(char)); - *tl++ = TT_END; + tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv)); + put_unaligned(TT_END, tl++); /* Close the tag list */ return (int)((char *)tl - (char *)reply->tag_list); } -STATIC int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); + /* default to resume from last known position, if possible */ + struct start_ov args = + { .start_sector = mdev->ov_start_sector }; + if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + /* w_make_ov_request expects position to be aligned */ + mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; + reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); return 0; } -STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { int retcode = NO_ERROR; @@ -1865,7 +1880,7 @@ STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { dev_info(DEV, "Preparing to skip initial sync\n"); skip_initial_sync = 1; - } else if (mdev->state.conn >= C_CONNECTED) { + } else if (mdev->state.conn != C_STANDALONE) { retcode = ERR_CONNECTED; goto out_dec; } @@ -1899,7 +1914,7 @@ out: return 0; } -STATIC struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp) +static struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp) { struct drbd_conf *mdev; @@ -1971,7 +1986,7 @@ static struct cn_handler_struct cnd_table[] = { [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 }, }; -STATIC void drbd_connector_callback(void *data) +static void drbd_connector_callback(void *data) { struct cn_msg *req = data; struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; @@ -2012,6 +2027,7 @@ STATIC void drbd_connector_callback(void *data) reply_size += cm->reply_body_size; + /* allocation not in the IO path, cqueue thread context */ cn_reply = kmalloc(reply_size, GFP_KERNEL); if (!cn_reply) { retcode = ERR_NOMEM; @@ -2050,18 +2066,13 @@ static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */ static unsigned short * __tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, - int len, int nul_terminated) + unsigned short len, int nul_terminated) { - int l = tag_descriptions[tag_number(tag)].max_len; - l = (len < l) ? len : l; - *tl++ = tag; - *tl++ = len; + unsigned short l = tag_descriptions[tag_number(tag)].max_len; + len = (len < l) ? len : l; + put_unaligned(tag, tl++); + put_unaligned(len, tl++); memcpy(tl, data, len); - /* TODO - * maybe we need to add some padding to the data stream. - * otherwise we may get strange effects on architectures - * that require certain data types to be strictly aligned, - * because now the next "unsigned short" may be misaligned. */ tl = (unsigned short*)((char*)tl + len); if (nul_terminated) *((char*)tl - 1) = 0; @@ -2083,17 +2094,16 @@ tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str) static unsigned short * tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val) { + put_unaligned(tag, tl++); switch(tag_type(tag)) { case TT_INTEGER: - *tl++ = tag; - *tl++ = sizeof(int); - *(int*)tl = *(int*)val; + put_unaligned(sizeof(int), tl++); + put_unaligned(*(int *)val, (int *)tl++); tl = (unsigned short*)((char*)tl+sizeof(int)); break; case TT_INT64: - *tl++ = tag; - *tl++ = sizeof(u64); - *(u64*)tl = *(u64*)val; + put_unaligned(sizeof(u64), tl++); + put_unaligned(*(u64 *)val, (u64 *)tl++); tl = (unsigned short*)((char*)tl+sizeof(u64)); break; default: @@ -2117,7 +2127,8 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ tl = get_state_to_tags(mdev, (struct get_state *)&state, tl); - *tl++ = TT_END; /* Close the tag list */ + + put_unaligned(TT_END, tl++); /* Close the tag list */ cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; @@ -2146,16 +2157,11 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) struct drbd_nl_cfg_reply *reply = (struct drbd_nl_cfg_reply *)cn_reply->data; unsigned short *tl = reply->tag_list; - int str_len; /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ - str_len = strlen(helper_name)+1; - *tl++ = T_helper; - *tl++ = str_len; - memcpy(tl, helper_name, str_len); - tl = (unsigned short *)((char *)tl + str_len); - *tl++ = TT_END; /* Close the tag list */ + tl = tl_add_str(tl, T_helper, helper_name); + put_unaligned(TT_END, tl++); /* Close the tag list */ cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; @@ -2193,12 +2199,15 @@ void drbd_bcast_ee(struct drbd_conf *mdev, /* aparently we have to memcpy twice, first to prepare the data for the * struct cn_msg, then within cn_netlink_send from the cn_msg to the * netlink skb. */ + /* receiver thread context, which is not in the writeout path (of this node), + * but may be in the writeout path of the _other_ node. + * GFP_NOIO to avoid potential "distributed deadlock". */ cn_reply = kmalloc( sizeof(struct cn_msg)+ sizeof(struct drbd_nl_cfg_reply)+ sizeof(struct dump_ee_tag_len_struct)+ - sizeof(short int) - , GFP_KERNEL); + sizeof(short int), + GFP_NOIO); if (!cn_reply) { dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n", @@ -2215,8 +2224,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev, tl = tl_add_int(tl, T_ee_sector, &e->sector); tl = tl_add_int(tl, T_ee_block_id, &e->block_id); - *tl++ = T_ee_data; - *tl++ = e->size; + put_unaligned(T_ee_data, tl++); + put_unaligned(e->size, tl++); __bio_for_each_segment(bvec, e->private_bio, i, 0) { void *d = kmap(bvec->bv_page); @@ -2224,7 +2233,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, kunmap(bvec->bv_page); tl=(unsigned short*)((char*)tl + bvec->bv_len); } - *tl++ = TT_END; /* Close the tag list */ + put_unaligned(TT_END, tl++); /* Close the tag list */ cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; @@ -2263,11 +2272,8 @@ void drbd_bcast_sync_progress(struct drbd_conf *mdev) drbd_get_syncer_progress(mdev, &rs_left, &res); put_ldev(mdev); - *tl++ = T_sync_progress; - *tl++ = sizeof(int); - memcpy(tl, &res, sizeof(int)); - tl = (unsigned short *)((char *)tl + sizeof(int)); - *tl++ = TT_END; /* Close the tag list */ + tl = tl_add_int(tl, T_sync_progress, &res); + put_unaligned(TT_END, tl++); /* Close the tag list */ cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index b59b9d9f078c..432a7dd39f7c 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -32,11 +32,10 @@ #include #include #include -#include #include #include "drbd_int.h" -STATIC int drbd_proc_open(struct inode *inode, struct file *file); +static int drbd_proc_open(struct inode *inode, struct file *file); struct proc_dir_entry *drbd_proc; @@ -55,7 +54,7 @@ struct file_operations drbd_proc_fops = { * [=====>..............] 33.5% (23456/123456) * finish: 2:20:20 speed: 6,345 (6,456) K/sec */ -STATIC void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) +static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) { unsigned long db, dt, dbdt, rt, rs_left; unsigned int res; @@ -134,7 +133,7 @@ STATIC void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) seq_printf(seq, " K/sec\n"); } -STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e) +static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) { struct bm_extent *bme = lc_entry(e, struct bm_extent, lce); @@ -144,7 +143,7 @@ STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e) ); } -STATIC int drbd_seq_show(struct seq_file *seq, void *v) +static int drbd_seq_show(struct seq_file *seq, void *v) { int i, hole = 0; const char *sn; @@ -259,7 +258,7 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v) return 0; } -STATIC int drbd_proc_open(struct inode *inode, struct file *file) +static int drbd_proc_open(struct inode *inode, struct file *file) { return single_open(file, drbd_seq_show, PDE(inode)->data); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 24dc84698de7..b222b24ddc51 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -30,11 +30,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -47,7 +47,6 @@ #include #include #include -#include #include "drbd_int.h" #include "drbd_tracing.h" #include "drbd_req.h" @@ -65,11 +64,11 @@ enum finish_epoch { FE_RECYCLED, }; -STATIC int drbd_do_handshake(struct drbd_conf *mdev); -STATIC int drbd_do_auth(struct drbd_conf *mdev); +static int drbd_do_handshake(struct drbd_conf *mdev); +static int drbd_do_auth(struct drbd_conf *mdev); -STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); -STATIC int e_end_block(struct drbd_conf *, struct drbd_work *, int); +static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); +static int e_end_block(struct drbd_conf *, struct drbd_work *, int); static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch) { @@ -93,7 +92,7 @@ static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epo * allocation would go beyond the max_buffers setting, this function sleeps * until DRBD frees a page somewhere else. */ -STATIC struct page *drbd_pp_alloc(struct drbd_conf *mdev, gfp_t gfp_mask) +static struct page *drbd_pp_alloc(struct drbd_conf *mdev, gfp_t gfp_mask) { unsigned long flags = 0; struct page *page; @@ -162,7 +161,7 @@ STATIC struct page *drbd_pp_alloc(struct drbd_conf *mdev, gfp_t gfp_mask) return page; } -STATIC void drbd_pp_free(struct drbd_conf *mdev, struct page *page) +static void drbd_pp_free(struct drbd_conf *mdev, struct page *page) { unsigned long flags = 0; int free_it; @@ -260,10 +259,10 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, } /* dump more of the bio. */ - DUMPI(bio->bi_max_vecs); - DUMPI(bio->bi_vcnt); - DUMPI(bio->bi_size); - DUMPI(bio->bi_phys_segments); + dev_err(DEV, "bio->bi_max_vecs = %d\n", bio->bi_max_vecs); + dev_err(DEV, "bio->bi_vcnt = %d\n", bio->bi_vcnt); + dev_err(DEV, "bio->bi_size = %d\n", bio->bi_size); + dev_err(DEV, "bio->bi_phys_segments = %d\n", bio->bi_phys_segments); goto fail2; break; @@ -339,7 +338,7 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) } -STATIC void reclaim_net_ee(struct drbd_conf *mdev) +static void reclaim_net_ee(struct drbd_conf *mdev) { struct drbd_epoch_entry *e; struct list_head *le, *tle; @@ -368,7 +367,7 @@ STATIC void reclaim_net_ee(struct drbd_conf *mdev) * Grab done_ee, call all callbacks, free the entries. * The callbacks typically send out ACKs. */ -STATIC int drbd_process_done_ee(struct drbd_conf *mdev) +static int drbd_process_done_ee(struct drbd_conf *mdev) { LIST_HEAD(work_list); struct drbd_epoch_entry *e, *t; @@ -458,7 +457,7 @@ void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) /* see also kernel_accept; which is only present since 2.6.18. * also we want to log which part of it failed, exactly */ -STATIC int drbd_accept(struct drbd_conf *mdev, const char **what, +static int drbd_accept(struct drbd_conf *mdev, const char **what, struct socket *sock, struct socket **newsock) { struct sock *sk = sock->sk; @@ -488,7 +487,7 @@ out: return err; } -STATIC int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock, +static int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock, void *buf, size_t size, int flags) { mm_segment_t oldfs; @@ -511,7 +510,7 @@ STATIC int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock, return rv; } -STATIC int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) +static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) { mm_segment_t oldfs; struct kvec iov = { @@ -564,7 +563,7 @@ STATIC int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) return rv; } -STATIC struct socket *drbd_try_connect(struct drbd_conf *mdev) +static struct socket *drbd_try_connect(struct drbd_conf *mdev) { const char *what; struct socket *sock; @@ -640,7 +639,7 @@ out: return sock; } -STATIC struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) +static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) { int timeo, err; struct socket *s_estab = NULL, *s_listen; @@ -687,7 +686,7 @@ out: return s_estab; } -STATIC int drbd_send_fp(struct drbd_conf *mdev, +static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, enum drbd_packets cmd) { struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; @@ -695,7 +694,7 @@ STATIC int drbd_send_fp(struct drbd_conf *mdev, return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } -STATIC enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) +static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) { struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; int rr; @@ -740,7 +739,7 @@ static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) * no point in trying again, please go standalone. * -2 We do not have a network config... */ -STATIC int drbd_connect(struct drbd_conf *mdev) +static int drbd_connect(struct drbd_conf *mdev) { struct socket *s, *sock, *msock; int try, h, ok; @@ -856,8 +855,12 @@ retry: if (mdev->net_conf->sndbuf_size) { sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size; - sock->sk->sk_rcvbuf = mdev->net_conf->sndbuf_size; - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK; + sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + } + + if (mdev->net_conf->rcvbuf_size) { + sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size; + sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } /* NOT YET ... @@ -906,15 +909,16 @@ retry: drbd_send_protocol(mdev); drbd_send_sync_param(mdev, &mdev->sync_conf); - drbd_send_sizes(mdev); + drbd_send_sizes(mdev, 0); drbd_send_uuids(mdev); drbd_send_state(mdev); clear_bit(USE_DEGR_WFC_T, &mdev->flags); + clear_bit(RESIZE_PENDING, &mdev->flags); return 1; } -STATIC int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h) +static int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h) { int r; @@ -937,7 +941,7 @@ STATIC int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch) +static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch) { int rv; @@ -956,7 +960,7 @@ STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE); } -STATIC int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +static int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct flush_work *fw = (struct flush_work *)w; struct drbd_epoch *epoch = fw->epoch; @@ -978,7 +982,7 @@ STATIC int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * @epoch: Epoch object. * @ev: Epoch event. */ -STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, +static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch, enum epoch_event ev) { @@ -1173,7 +1177,7 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea return 1; } -STATIC int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) +static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) { int rv, issue_flush; struct p_barrier *p = (struct p_barrier *)h; @@ -1219,7 +1223,9 @@ STATIC int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) break; } - epoch = kmalloc(sizeof(struct drbd_epoch), GFP_KERNEL); + /* receiver context, in the writeout path of the other node. + * avoid potential distributed deadlock */ + epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO); if (!epoch) { dev_warn(DEV, "Allocation of an epoch failed, slowing down\n"); issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); @@ -1256,7 +1262,7 @@ STATIC int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) /* used from receive_RSDataReply (recv_resync_read) * and from receive_Data */ -STATIC struct drbd_epoch_entry * +static struct drbd_epoch_entry * read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local) { struct drbd_epoch_entry *e; @@ -1319,7 +1325,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ /* drbd_drain_block() just takes a data block * out of the socket input buffer, and discards it. */ -STATIC int drbd_drain_block(struct drbd_conf *mdev, int data_size) +static int drbd_drain_block(struct drbd_conf *mdev, int data_size) { struct page *page; int rr, rv = 1; @@ -1352,7 +1358,7 @@ static void maybe_kick_lo(struct drbd_conf *mdev) drbd_kick_lo(mdev); } -STATIC int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, +static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, sector_t sector, int data_size) { struct bio_vec *bvec; @@ -1407,7 +1413,7 @@ STATIC int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, /* e_end_resync_block() is called via * drbd_process_done_ee() by asender only */ -STATIC int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) { struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; sector_t sector = e->sector; @@ -1430,7 +1436,7 @@ STATIC int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u return ok; } -STATIC int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) +static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) { struct drbd_epoch_entry *e; @@ -1463,7 +1469,7 @@ STATIC int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si return TRUE; } -STATIC int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) +static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) { struct drbd_request *req; sector_t sector; @@ -1503,7 +1509,7 @@ STATIC int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) return ok; } -STATIC int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) +static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) { sector_t sector; unsigned int header_size, data_size; @@ -1541,7 +1547,7 @@ STATIC int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) /* e_end_block() is called via drbd_process_done_ee(). * this means this function only runs in the asender thread */ -STATIC int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) { struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; sector_t sector = e->sector; @@ -1590,7 +1596,7 @@ STATIC int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) return ok; } -STATIC int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) { struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; int ok = 1; @@ -1662,7 +1668,7 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) } /* mirrored write */ -STATIC int receive_Data(struct drbd_conf *mdev, struct p_header *h) +static int receive_Data(struct drbd_conf *mdev, struct p_header *h) { sector_t sector; struct drbd_epoch_entry *e; @@ -1918,7 +1924,7 @@ out_interrupted: return FALSE; } -STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) +static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); @@ -1992,7 +1998,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) case P_CSUM_RS_REQUEST: fault_type = DRBD_FAULT_RS_RD; digest_size = h->length - brps ; - di = kmalloc(sizeof(*di) + digest_size, GFP_KERNEL); + di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO); if (!di) { put_ldev(mdev); drbd_free_ee(mdev, e); @@ -2030,6 +2036,18 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) break; case P_OV_REQUEST: + if (mdev->state.conn >= C_CONNECTED && + mdev->state.conn != C_VERIFY_T) + dev_warn(DEV, "ASSERT FAILED: got P_OV_REQUEST while being %s\n", + conns_to_name(mdev->state.conn)); + if (mdev->ov_start_sector == ~(sector_t)0 && + mdev->agreed_pro_version >= 90) { + mdev->ov_start_sector = sector; + mdev->ov_position = sector; + mdev->ov_left = mdev->rs_total - BM_SECT_TO_BIT(sector); + dev_info(DEV, "Online Verify start sector: %llu\n", + (unsigned long long)sector); + } e->w.cb = w_e_end_ov_req; fault_type = DRBD_FAULT_RS_RD; /* Eventually this should become asynchrously. Currently it @@ -2068,7 +2086,7 @@ STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) +static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) { int self, peer, rv = -100; unsigned long ch_self, ch_peer; @@ -2140,7 +2158,7 @@ STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) return rv; } -STATIC int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) +static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) { int self, peer, hg, rv = -100; @@ -2173,6 +2191,10 @@ STATIC int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) hg = drbd_asb_recover_0p(mdev); if (hg == -1 && mdev->state.role == R_PRIMARY) { self = drbd_set_role(mdev, R_SECONDARY, 0); + /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, + * we might be here in C_WF_REPORT_PARAMS which is transient. + * we do not need to wait for the after state change work either. */ + self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); if (self != SS_SUCCESS) { drbd_khelper(mdev, "pri-lost-after-sb"); } else { @@ -2186,7 +2208,7 @@ STATIC int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) return rv; } -STATIC int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) +static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) { int self, peer, hg, rv = -100; @@ -2211,7 +2233,10 @@ STATIC int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) case ASB_CALL_HELPER: hg = drbd_asb_recover_0p(mdev); if (hg == -1) { - self = drbd_set_role(mdev, R_SECONDARY, 0); + /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE, + * we might be here in C_WF_REPORT_PARAMS which is transient. + * we do not need to wait for the after state change work either. */ + self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY)); if (self != SS_SUCCESS) { drbd_khelper(mdev, "pri-lost-after-sb"); } else { @@ -2225,7 +2250,7 @@ STATIC int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) return rv; } -STATIC void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid, +static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid, u64 bits, u64 flags) { if (!uuid) { @@ -2252,7 +2277,7 @@ STATIC void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid, -100 after split brain, disconnect -1000 unrelated data */ -STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local) +static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local) { u64 self, peer; int i, j; @@ -2326,7 +2351,7 @@ STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l *rule_nr = 10; for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) { - self = mdev->p_uuid[i] & ~((u64)1); + self = mdev->ldev->md.uuid[i] & ~((u64)1); for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) { peer = mdev->p_uuid[j] & ~((u64)1); if (self == peer) @@ -2340,7 +2365,7 @@ STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l /* drbd_sync_handshake() returns the new conn state on success, or CONN_MASK (-1) on failure. */ -STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role, +static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role, enum drbd_disk_state peer_disk) __must_hold(local) { int hg, rule_nr; @@ -2465,7 +2490,7 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } /* returns 1 if invalid */ -STATIC int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) +static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) { /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */ if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) || @@ -2485,7 +2510,7 @@ STATIC int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) return 1; } -STATIC int receive_protocol(struct drbd_conf *mdev, struct p_header *h) +static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) { struct p_protocol *p = (struct p_protocol *)h; int header_size, data_size; @@ -2577,7 +2602,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, alg, name, PTR_ERR(tfm)); return tfm; } - if (crypto_tfm_alg_type(crypto_hash_tfm(tfm)) != CRYPTO_ALG_TYPE_DIGEST) { + if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) { crypto_free_hash(tfm); dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name); return ERR_PTR(-EINVAL); @@ -2585,7 +2610,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, return tfm; } -STATIC int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) +static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) { int ok = TRUE; struct p_rs_param_89 *p = (struct p_rs_param_89 *)h; @@ -2656,8 +2681,10 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) } verify_tfm = drbd_crypto_alloc_digest_safe(mdev, p->verify_alg, "verify-alg"); - if (IS_ERR(verify_tfm)) + if (IS_ERR(verify_tfm)) { + verify_tfm = NULL; goto disconnect; + } } if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) { @@ -2668,8 +2695,10 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) } csums_tfm = drbd_crypto_alloc_digest_safe(mdev, p->csums_alg, "csums-alg"); - if (IS_ERR(csums_tfm)) + if (IS_ERR(csums_tfm)) { + csums_tfm = NULL; goto disconnect; + } } @@ -2694,12 +2723,16 @@ STATIC int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) return ok; disconnect: + /* just for completeness: actually not needed, + * as this is not reached if csums_tfm was ok. */ + crypto_free_hash(csums_tfm); + /* but free the verify_tfm again, if csums_tfm did not work out */ crypto_free_hash(verify_tfm); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; } -STATIC void drbd_setup_order_type(struct drbd_conf *mdev, int peer) +static void drbd_setup_order_type(struct drbd_conf *mdev, int peer) { /* sorry, we currently have no working implementation * of distributed TCQ */ @@ -2718,7 +2751,7 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, (unsigned long long)a, (unsigned long long)b); } -STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) +static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) { struct p_sizes *p = (struct p_sizes *)h; enum determine_dev_size dd = unchanged; @@ -2815,7 +2848,7 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) } max_seg_s = be32_to_cpu(p->max_segment_size); - if (max_seg_s != mdev->rq_queue->max_segment_size) + if (max_seg_s != queue_max_segment_size(mdev->rq_queue)) drbd_setup_queue_param(mdev, max_seg_s); drbd_setup_order_type(mdev, be32_to_cpu(p->queue_order_type)); @@ -2827,9 +2860,10 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) drbd_get_capacity(mdev->this_bdev) || ldsc) { /* we have different sizes, probabely peer * needs to know my new size... */ - drbd_send_sizes(mdev); + drbd_send_sizes(mdev, 0); } - if (dd == grew && mdev->state.conn == C_CONNECTED) { + if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) || + (dd == grew && mdev->state.conn == C_CONNECTED)) { if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.disk >= D_INCONSISTENT) resync_after_online_grow(mdev); @@ -2841,7 +2875,7 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC int receive_uuids(struct drbd_conf *mdev, struct p_header *h) +static int receive_uuids(struct drbd_conf *mdev, struct p_header *h) { struct p_uuids *p = (struct p_uuids *)h; u64 *p_uuid; @@ -2851,7 +2885,7 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct p_header *h) if (drbd_recv(mdev, h->payload, h->length) != h->length) return FALSE; - p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_KERNEL); + p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) p_uuid[i] = be64_to_cpu(p->uuid[i]); @@ -2903,7 +2937,7 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct p_header *h) * convert_state() - Converts the peer's view of the cluster state to our point of view * @ps: The state as seen by the peer. */ -STATIC union drbd_state convert_state(union drbd_state ps) +static union drbd_state convert_state(union drbd_state ps) { union drbd_state ms; @@ -2929,7 +2963,7 @@ STATIC union drbd_state convert_state(union drbd_state ps) return ms; } -STATIC int receive_req_state(struct drbd_conf *mdev, struct p_header *h) +static int receive_req_state(struct drbd_conf *mdev, struct p_header *h) { struct p_req_state *p = (struct p_req_state *)h; union drbd_state mask, val; @@ -2959,7 +2993,7 @@ STATIC int receive_req_state(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC int receive_state(struct drbd_conf *mdev, struct p_header *h) +static int receive_state(struct drbd_conf *mdev, struct p_header *h) { struct p_state *p = (struct p_state *)h; enum drbd_conns nconn, oconn; @@ -2993,12 +3027,21 @@ STATIC int receive_state(struct drbd_conf *mdev, struct p_header *h) get_ldev_if_state(mdev, D_NEGOTIATING)) { int cr; /* consider resync */ + /* if we established a new connection */ cr = (oconn < C_CONNECTED); + /* if we had an established connection + * and one of the nodes newly attaches a disk */ cr |= (oconn == C_CONNECTED && (peer_state.disk == D_NEGOTIATING || mdev->state.disk == D_NEGOTIATING)); - cr |= test_bit(CONSIDER_RESYNC, &mdev->flags); /* peer forced */ - cr |= (oconn == C_CONNECTED && peer_state.conn > C_CONNECTED); + /* if we have both been inconsistent, and the peer has been + * forced to be UpToDate with --overwrite-data */ + cr |= test_bit(CONSIDER_RESYNC, &mdev->flags); + /* if we had been plain connected, and the admin requested to + * start a sync by "invalidate" or "invalidate-remote" */ + cr |= (oconn == C_CONNECTED && + (peer_state.conn >= C_STARTING_SYNC_S && + peer_state.conn <= C_WF_BITMAP_T)); if (cr) nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk); @@ -3058,7 +3101,7 @@ STATIC int receive_state(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) +static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) { struct p_rs_uuid *p = (struct p_rs_uuid *)h; @@ -3233,7 +3276,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, in order to be agnostic to the 32 vs 64 bits issue. returns 0 on failure, 1 if we suceessfully received it. */ -STATIC int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) +static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) { struct bm_xfer_ctx c; void *buffer; @@ -3321,7 +3364,7 @@ STATIC int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) return ok; } -STATIC int receive_skip(struct drbd_conf *mdev, struct p_header *h) +static int receive_skip(struct drbd_conf *mdev, struct p_header *h) { /* TODO zero copy sink :) */ static char sink[128]; @@ -3340,7 +3383,7 @@ STATIC int receive_skip(struct drbd_conf *mdev, struct p_header *h) return size == 0; } -STATIC int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h) +static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h) { if (mdev->state.disk >= D_INCONSISTENT) drbd_kick_lo(mdev); @@ -3383,7 +3426,7 @@ static drbd_cmd_handler_f drbd_default_handler[] = { static drbd_cmd_handler_f *drbd_cmd_handler = drbd_default_handler; static drbd_cmd_handler_f *drbd_opt_cmd_handler; -STATIC void drbdd(struct drbd_conf *mdev) +static void drbdd(struct drbd_conf *mdev) { drbd_cmd_handler_f handler; struct p_header *header = &mdev->data.rbuf.header; @@ -3421,7 +3464,7 @@ STATIC void drbdd(struct drbd_conf *mdev) } } -STATIC void drbd_fail_pending_reads(struct drbd_conf *mdev) +static void drbd_fail_pending_reads(struct drbd_conf *mdev) { struct hlist_head *slot; struct hlist_node *pos; @@ -3454,7 +3497,7 @@ STATIC void drbd_fail_pending_reads(struct drbd_conf *mdev) spin_unlock_irq(&mdev->req_lock); } -STATIC void drbd_disconnect(struct drbd_conf *mdev) +static void drbd_disconnect(struct drbd_conf *mdev) { struct drbd_work prev_work_done; enum drbd_fencing_p fp; @@ -3611,7 +3654,7 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev) * * for now, they are expected to be zero, but ignored. */ -STATIC int drbd_send_handshake(struct drbd_conf *mdev) +static int drbd_send_handshake(struct drbd_conf *mdev) { /* ASSERT current == mdev->receiver ... */ struct p_handshake *p = &mdev->data.sbuf.handshake; @@ -3761,7 +3804,7 @@ int drbd_do_auth(struct drbd_conf *mdev) goto fail; } - peers_ch = kmalloc(p.length, GFP_KERNEL); + peers_ch = kmalloc(p.length, GFP_NOIO); if (peers_ch == NULL) { dev_err(DEV, "kmalloc of peers_ch failed\n"); rv = 0; @@ -3777,7 +3820,7 @@ int drbd_do_auth(struct drbd_conf *mdev) } resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm); - response = kmalloc(resp_size, GFP_KERNEL); + response = kmalloc(resp_size, GFP_NOIO); if (response == NULL) { dev_err(DEV, "kmalloc of response failed\n"); rv = 0; @@ -3823,7 +3866,7 @@ int drbd_do_auth(struct drbd_conf *mdev) goto fail; } - right_response = kmalloc(resp_size, GFP_KERNEL); + right_response = kmalloc(resp_size, GFP_NOIO); if (response == NULL) { dev_err(DEV, "kmalloc of right_response failed\n"); rv = 0; @@ -3854,7 +3897,7 @@ int drbd_do_auth(struct drbd_conf *mdev) } #endif -STATIC int drbdd_init(struct drbd_thread *thi) +int drbdd_init(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; unsigned int minor = mdev_to_minor(mdev); @@ -3892,7 +3935,7 @@ STATIC int drbdd_init(struct drbd_thread *thi) /* ********* acknowledge sender ******** */ -STATIC int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) +static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) { struct p_req_state_reply *p = (struct p_req_state_reply *)h; @@ -3910,13 +3953,13 @@ STATIC int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC int got_Ping(struct drbd_conf *mdev, struct p_header *h) +static int got_Ping(struct drbd_conf *mdev, struct p_header *h) { return drbd_send_ping_ack(mdev); } -STATIC int got_PingAck(struct drbd_conf *mdev, struct p_header *h) +static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) { /* restore idle timeout */ mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; @@ -3924,7 +3967,7 @@ STATIC int got_PingAck(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) +static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -3969,7 +4012,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, return NULL; } -STATIC int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) +static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) { struct drbd_request *req; struct p_block_ack *p = (struct p_block_ack *)h; @@ -4021,7 +4064,7 @@ STATIC int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC int got_NegAck(struct drbd_conf *mdev, struct p_header *h) +static int got_NegAck(struct drbd_conf *mdev, struct p_header *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4055,7 +4098,7 @@ STATIC int got_NegAck(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) +static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) { struct drbd_request *req; struct p_block_ack *p = (struct p_block_ack *)h; @@ -4080,7 +4123,7 @@ STATIC int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) +static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) { sector_t sector; int size; @@ -4103,7 +4146,7 @@ STATIC int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) +static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) { struct p_barrier_ack *p = (struct p_barrier_ack *)h; @@ -4112,7 +4155,7 @@ STATIC int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -STATIC int got_OVResult(struct drbd_conf *mdev, struct p_header *h) +static int got_OVResult(struct drbd_conf *mdev, struct p_header *h) { struct p_block_ack *p = (struct p_block_ack *)h; struct drbd_work *w; @@ -4133,12 +4176,13 @@ STATIC int got_OVResult(struct drbd_conf *mdev, struct p_header *h) dec_rs_pending(mdev); if (--mdev->ov_left == 0) { - w = kmalloc(sizeof(*w), GFP_KERNEL); + w = kmalloc(sizeof(*w), GFP_NOIO); if (w) { w->cb = w_ov_finished; drbd_queue_work_front(&mdev->data.work, w); } else { dev_err(DEV, "kmalloc(w) failed."); + ov_oos_print(mdev); drbd_resync_finished(mdev); } } @@ -4165,16 +4209,18 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply}, + [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, + [P_MAX_CMD] = { 0, NULL }, }; - if (cmd > P_MAX_CMD) + if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) return NULL; return &asender_tbl[cmd]; } -STATIC int drbd_asender(struct drbd_thread *thi) +int drbd_asender(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; struct p_header *h = &mdev->meta.rbuf.header; @@ -4285,7 +4331,6 @@ STATIC int drbd_asender(struct drbd_thread *thi) expect = cmd->pkt_size; ERR_IF(len != expect-sizeof(struct p_header)) { trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__); - DUMPI(expect); goto reconnect; } } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5c4039ad052e..d2b941cbc0a0 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -121,8 +121,8 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const list_empty(&req->w.list))) { /* DEBUG ASSERT only; if this triggers, we * probably corrupt the worker list here */ - DUMPP(req->w.list.next); - DUMPP(req->w.list.prev); + dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next); + dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev); } req->w.cb = w_io_error; drbd_queue_work(&mdev->data.work, &req->w); @@ -326,7 +326,7 @@ void _req_may_be_done(struct drbd_request *req, int error) * second hlist_for_each_entry becomes a noop. This is even simpler than to * grab a reference on the net_conf, and check for the two_primaries flag... */ -STATIC int _req_conflicts(struct drbd_request *req) +static int _req_conflicts(struct drbd_request *req) { struct drbd_conf *mdev = req->mdev; const sector_t sector = req->sector; @@ -689,7 +689,7 @@ void _req_mod(struct drbd_request *req, enum drbd_req_event what, int error) * since size may be bigger than BM_BLOCK_SIZE, * we may need to check several bits. */ -STATIC int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size) +static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size) { unsigned long sbnr, ebnr; sector_t esector, nr_sectors; @@ -713,7 +713,7 @@ STATIC int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); } -STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) +static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) { const int rw = bio_rw(bio); const int size = bio->bi_size; diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index b230693f35e6..09922d2d5bf9 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -71,13 +71,13 @@ static const char *drbd_disk_s_names[] = { static const char *drbd_state_sw_errors[] = { [-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config", [-SS_NO_UP_TO_DATE_DISK] = "Refusing to be Primary without at least one UpToDate disk", - [-SS_BOTH_INCONSISTENT] = "Refusing to be inconsistent on both nodes", - [-SS_SYNCING_DISKLESS] = "Refusing to be syncing and diskless", + [-SS_NO_LOCAL_DISK] = "Can not resync without local disk", + [-SS_NO_REMOTE_DISK] = "Can not resync without remote disk", [-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected", [-SS_PRIMARY_NOP] = "Refusing to be Primary while peer is not outdated", [-SS_RESYNC_RUNNING] = "Can not start OV/resync since it is already active", [-SS_ALREADY_STANDALONE] = "Can not disconnect a StandAlone device", - [-SS_CW_FAILED_BY_PEER] = "State changed was refused by peer node", + [-SS_CW_FAILED_BY_PEER] = "State change was refused by peer node", [-SS_IS_DISKLESS] = "Device is diskless, the requesed operation requires a disk", [-SS_DEVICE_IN_USE] = "Device is held open by someone", [-SS_NO_NET_CONFIG] = "Have no net/connection configuration", diff --git a/drivers/block/drbd/drbd_tracing.c b/drivers/block/drbd/drbd_tracing.c index b467e92dda76..f2827209ca34 100644 --- a/drivers/block/drbd/drbd_tracing.c +++ b/drivers/block/drbd/drbd_tracing.c @@ -71,7 +71,7 @@ enum dbg_print_flags { }; /* Macro stuff */ -STATIC char *nl_packet_name(int packet_type) +static char *nl_packet_name(int packet_type) { /* Generate packet type strings */ #define NL_PACKET(name, number, fields) \ @@ -371,7 +371,7 @@ static void probe_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete, struct drbd_request *r) { -#ifdef CONFIG_LBD +#if defined(CONFIG_LBDAF) || defined(CONFIG_LBD) #define SECTOR_FORMAT "%Lx" #else #define SECTOR_FORMAT "%lx" @@ -387,7 +387,7 @@ static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio * const int rw = bio->bi_rw; const int biorw = (rw & (RW_MASK|RWA_MASK)); const int biobarrier = (rw & (1< #include #include - +#include #include #include #include #include -#include #include #include #include @@ -40,14 +39,13 @@ #include #include -#include #include "drbd_int.h" #include "drbd_req.h" #include "drbd_tracing.h" #define SLEEP_TIME (HZ/10) -STATIC int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); +static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); @@ -293,7 +291,7 @@ int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return 1; /* Simply ignore this! */ } -STATIC void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest) +void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest) { struct hash_desc desc; struct scatterlist sg; @@ -313,7 +311,7 @@ STATIC void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bi crypto_hash_final(&desc, digest); } -STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; int digest_size; @@ -329,7 +327,7 @@ STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel if (likely(drbd_bio_uptodate(e->private_bio))) { digest_size = crypto_hash_digestsize(mdev->csums_tfm); - digest = kmalloc(digest_size, GFP_KERNEL); + digest = kmalloc(digest_size, GFP_NOIO); if (digest) { drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest); @@ -359,7 +357,7 @@ STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) -STATIC int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) +static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) { struct drbd_epoch_entry *e; @@ -421,9 +419,9 @@ int w_make_resync_request(struct drbd_conf *mdev, unsigned long bit; sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); - int max_segment_size = mdev->rq_queue->max_segment_size; - int number, i, size; - int align; + int max_segment_size = queue_max_segment_size(mdev->rq_queue); + int number, i, size, pe, mx; + int align, queued, sndbuf; if (unlikely(cancel)) return 1; @@ -446,15 +444,40 @@ int w_make_resync_request(struct drbd_conf *mdev, mdev->resync_work.cb = w_resync_inactive; return 1; } - /* All goto requeses have to happend after this block: get_ldev() */ - number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ); + number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ); + pe = atomic_read(&mdev->rs_pending_cnt); - if (atomic_read(&mdev->rs_pending_cnt) > number) - goto requeue; - number -= atomic_read(&mdev->rs_pending_cnt); + mutex_lock(&mdev->data.mutex); + if (mdev->data.socket) + mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req); + else + mx = 1; + mutex_unlock(&mdev->data.mutex); + + /* For resync rates >160MB/sec, allow more pending RS requests */ + if (number > mx) + mx = number; + + /* Limit the nunber of pending RS requests to no more than the peer's receive buffer */ + if ((pe + number) > mx) { + number = mx - pe; + } for (i = 0; i < number; i++) { + /* Stop generating RS requests, when half of the sendbuffer is filled */ + mutex_lock(&mdev->data.mutex); + if (mdev->data.socket) { + queued = mdev->data.socket->sk->sk_wmem_queued; + sndbuf = mdev->data.socket->sk->sk_sndbuf; + } else { + queued = 1; + sndbuf = 0; + } + mutex_unlock(&mdev->data.mutex); + if (queued > sndbuf / 2) + goto requeue; + next_sector: size = BM_BLOCK_SIZE; bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); @@ -589,6 +612,11 @@ int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel) sector = mdev->ov_position; for (i = 0; i < number; i++) { + if (sector >= capacity) { + mdev->resync_work.cb = w_resync_inactive; + return 1; + } + size = BM_BLOCK_SIZE; if (drbd_try_rs_begin_io(mdev, sector)) { @@ -605,11 +633,6 @@ int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return 0; } sector += BM_SECT_PER_BIT; - if (sector >= capacity) { - mdev->resync_work.cb = w_resync_inactive; - - return 1; - } } mdev->ov_position = sector; @@ -628,7 +651,7 @@ int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return 1; } -STATIC int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { kfree(w); @@ -766,6 +789,7 @@ out: mdev->rs_total = 0; mdev->rs_failed = 0; mdev->rs_paused = 0; + mdev->ov_start_sector = 0; if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) { dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n"); @@ -911,7 +935,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (mdev->csums_tfm) { digest_size = crypto_hash_digestsize(mdev->csums_tfm); D_ASSERT(digest_size == di->digest_size); - digest = kmalloc(digest_size, GFP_KERNEL); + digest = kmalloc(digest_size, GFP_NOIO); } if (digest) { drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest); @@ -967,13 +991,15 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) goto out; digest_size = crypto_hash_digestsize(mdev->verify_tfm); - digest = kmalloc(digest_size, GFP_KERNEL); + /* FIXME if this allocation fails, online verify will not terminate! */ + digest = kmalloc(digest_size, GFP_NOIO); if (digest) { drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest); + inc_rs_pending(mdev); ok = drbd_send_drequest_csum(mdev, e->sector, e->size, digest, digest_size, P_OV_REPLY); - if (ok) - inc_rs_pending(mdev); + if (!ok) + dec_rs_pending(mdev); kfree(digest); } @@ -1021,7 +1047,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (likely(drbd_bio_uptodate(e->private_bio))) { digest_size = crypto_hash_digestsize(mdev->verify_tfm); - digest = kmalloc(digest_size, GFP_KERNEL); + digest = kmalloc(digest_size, GFP_NOIO); if (digest) { drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest); @@ -1157,7 +1183,7 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -STATIC int _drbd_may_sync_now(struct drbd_conf *mdev) +static int _drbd_may_sync_now(struct drbd_conf *mdev) { struct drbd_conf *odev = mdev; @@ -1180,7 +1206,7 @@ STATIC int _drbd_may_sync_now(struct drbd_conf *mdev) * * Called from process context only (admin command and after_state_ch). */ -STATIC int _drbd_pause_after(struct drbd_conf *mdev) +static int _drbd_pause_after(struct drbd_conf *mdev) { struct drbd_conf *odev; int i, rv = 0; @@ -1205,7 +1231,7 @@ STATIC int _drbd_pause_after(struct drbd_conf *mdev) * * Called from process context only (admin command and worker). */ -STATIC int _drbd_resume_next(struct drbd_conf *mdev) +static int _drbd_resume_next(struct drbd_conf *mdev) { struct drbd_conf *odev; int i, rv = 0; @@ -1240,19 +1266,46 @@ void suspend_other_sg(struct drbd_conf *mdev) write_unlock_irq(&global_state_lock); } -void drbd_alter_sa(struct drbd_conf *mdev, int na) +static int sync_after_error(struct drbd_conf *mdev, int o_minor) { - int changes; + struct drbd_conf *odev; - write_lock_irq(&global_state_lock); - mdev->sync_conf.after = na; + if (o_minor == -1) + return NO_ERROR; + if (o_minor < -1 || minor_to_mdev(o_minor) == NULL) + return ERR_SYNC_AFTER; + + /* check for loops */ + odev = minor_to_mdev(o_minor); + while (1) { + if (odev == mdev) + return ERR_SYNC_AFTER_CYCLE; - do { - changes = _drbd_pause_after(mdev); - changes |= _drbd_resume_next(mdev); - } while (changes); + /* dependency chain ends here, no cycles. */ + if (odev->sync_conf.after == -1) + return NO_ERROR; + /* follow the dependency chain */ + odev = minor_to_mdev(odev->sync_conf.after); + } +} + +int drbd_alter_sa(struct drbd_conf *mdev, int na) +{ + int changes; + int retcode; + + write_lock_irq(&global_state_lock); + retcode = sync_after_error(mdev, na); + if (retcode == NO_ERROR) { + mdev->sync_conf.after = na; + do { + changes = _drbd_pause_after(mdev); + changes |= _drbd_resume_next(mdev); + } while (changes); + } write_unlock_irq(&global_state_lock); + return retcode; } /** @@ -1268,6 +1321,11 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) union drbd_state ns; int r; + if (mdev->state.conn >= C_SYNC_SOURCE) { + dev_err(DEV, "Resync already running!\n"); + return; + } + trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, "Resync starting: side=%s\n", side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource"); diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index 724fb44aad06..f93fa111ce50 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -7,11 +7,6 @@ /* see get_sb_bdev and bd_claim */ extern char *drbd_sec_holder; -static inline sector_t drbd_get_hardsect_size(struct block_device *bdev) -{ - return bdev->bd_disk->queue->hardsect_size; -} - /* sets the number of 512 byte sectors of our virtual device */ static inline void drbd_set_my_capacity(struct drbd_conf *mdev, sector_t size) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 250002101e4e..dc478c648e37 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -26,7 +26,6 @@ #ifndef DRBD_H #define DRBD_H #include - #include #ifdef __KERNEL__ @@ -53,6 +52,13 @@ #endif +extern const char *drbd_buildtag(void); +#define REL_VERSION "8.3.2rc2" +#define API_VERSION 88 +#define PRO_VERSION_MIN 86 +#define PRO_VERSION_MAX 90 + + enum drbd_io_error_p { EP_PASS_ON, /* FIXME should the better be named "Ignore"? */ EP_CALL_HELPER, @@ -171,8 +177,8 @@ enum drbd_conns { C_WF_CONNECTION, C_WF_REPORT_PARAMS, /* we have a socket */ C_CONNECTED, /* we have introduced each other */ - C_STARTING_SYNC_S, /* starting full sync by IOCTL. */ - C_STARTING_SYNC_T, /* stariing full sync by IOCTL. */ + C_STARTING_SYNC_S, /* starting full sync by admin request. */ + C_STARTING_SYNC_T, /* stariing full sync by admin request. */ C_WF_BITMAP_S, C_WF_BITMAP_T, C_WF_SYNC_UUID, @@ -249,8 +255,8 @@ enum drbd_state_ret_codes { SS_UNKNOWN_ERROR = 0, /* Used to sleep longer in _drbd_request_state */ SS_TWO_PRIMARIES = -1, SS_NO_UP_TO_DATE_DISK = -2, - SS_BOTH_INCONSISTENT = -4, - SS_SYNCING_DISKLESS = -5, + SS_NO_LOCAL_DISK = -4, + SS_NO_REMOTE_DISK = -5, SS_CONNECTED_OUTDATES = -6, SS_PRIMARY_NOP = -7, SS_RESYNC_RUNNING = -8, diff --git a/include/linux/drbd_config.h b/include/linux/drbd_config.h deleted file mode 100644 index 06a750ed58bf..000000000000 --- a/include/linux/drbd_config.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - drbd_config.h - DRBD's compile time configuration. - - drbd is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - drbd is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with drbd; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef DRBD_CONFIG_H -#define DRBD_CONFIG_H - -extern const char *drbd_buildtag(void); - -#define REL_VERSION "8.3.1" -#define API_VERSION 88 -#define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 90 - -#ifndef __CHECKER__ /* for a sparse run, we need all STATICs */ -#define DBG_ALL_SYMBOLS /* no static functs, improves quality of OOPS traces */ -#endif - -/* Enable fault insertion code */ -#define DRBD_ENABLE_FAULTS - -#endif diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 2fafc2b9cdb1..9d067ce46960 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -72,6 +72,10 @@ #define DRBD_SNDBUF_SIZE_MAX (10<<20) #define DRBD_SNDBUF_SIZE_DEF (2*65535) +#define DRBD_RCVBUF_SIZE_MIN 0 +#define DRBD_RCVBUF_SIZE_MAX (10<<20) +#define DRBD_RCVBUF_SIZE_DEF (2*65535) + /* @4k PageSize -> 128kB - 512MB */ #define DRBD_MAX_BUFFERS_MIN 32 #define DRBD_MAX_BUFFERS_MAX 131072 diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index cc99f3ecd8ca..db5721ad50d1 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -55,6 +55,7 @@ NL_PACKET(net_conf, 5, NL_INTEGER( 26, T_MAY_IGNORE, after_sb_2p) NL_INTEGER( 39, T_MAY_IGNORE, rr_conflict) NL_INTEGER( 40, T_MAY_IGNORE, ping_timeo) + NL_INTEGER( 67, T_MAY_IGNORE, rcvbuf_size) /* 59 addr_family was available in GIT, never released */ NL_BIT( 60, T_MANDATORY, mind_af) NL_BIT( 27, T_MAY_IGNORE, want_lose) @@ -77,7 +78,7 @@ NL_PACKET(syncer_conf, 8, NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX) NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32) NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX) - NL_BIT( 65, T_MAY_IGNORE, use_rle_encoding) + NL_BIT( 65, T_MAY_IGNORE, use_rle) ) NL_PACKET(invalidate, 9, ) @@ -121,6 +122,7 @@ NL_PACKET(dump_ee, 24, ) NL_PACKET(start_ov, 25, + NL_INT64( 66, T_MAY_IGNORE, start_sector) ) NL_PACKET(new_c_uuid, 26, diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 69e2455b00be..3a2b2d9b0472 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -27,6 +27,10 @@ #define LRU_CACHE_H #include +#include +#include +#include /* for memset */ +#include /* This header file (and its .c file; kernel-doc of functions see there) @@ -142,22 +146,29 @@ write intent log information, three of which are mentioned here. * an element is said to be "in the active set", * if either on "in_use" or "lru", i.e. lc_number != LC_FREE. * - * DRBD currently only uses 61 elements on the resync lru_cache (total memory - * usage 2 pages), and up to 3833 elements on the act_log lru_cache, totalling - * ~215 kB for 64bit architechture, ~53 pages. + * DRBD currently (May 2009) only uses 61 elements on the resync lru_cache + * (total memory usage 2 pages), and up to 3833 elements on the act_log + * lru_cache, totalling ~215 kB for 64bit architechture, ~53 pages. * * We usually do not actually free these objects again, but only "recycle" * them, as the change "index: -old_label, +LC_FREE" would need a transaction - * as well. Which also means that using a kmem_cache or even mempool to - * allocate the objects from wastes some resources. But it would avoid high - * order page allocations in kmalloc, so we may change to a kmem_cache backed - * allocation of the elements in the near future. + * as well. Which also means that using a kmem_cache to allocate the objects + * from wastes some resources. + * But it avoids high order page allocations in kmalloc. */ struct lc_element { struct hlist_node colision; struct list_head list; /* LRU list or free list */ - unsigned int refcnt; - unsigned int lc_number; + unsigned refcnt; + /* back "pointer" into ts_cache->element[index], + * for paranoia, and for "ts_element_to_index" */ + unsigned lc_index; + /* if we want to track a larger set of objects, + * it needs to become arch independend u64 */ + unsigned lc_number; + + /* special label when on free list */ +#define LC_FREE (~0U) }; struct lru_cache { @@ -166,16 +177,25 @@ struct lru_cache { struct list_head free; struct list_head in_use; - /* size of tracked objects */ + /* the pre-created kmem cache to allocate the objects from */ + struct kmem_cache *lc_cache; + + /* size of tracked objects, used to memset(,0,) them in lc_reset */ size_t element_size; /* offset of struct lc_element member in the tracked object */ size_t element_off; /* number of elements (indices) */ unsigned int nr_elements; + /* Arbitrary limit on maximum tracked objects. Practical limit is much + * lower due to allocation failures, probably. For typical use cases, + * nr_elements should be a few thousand at most. + * This also limits the maximum value of ts_element.ts_index, allowing the + * 8 high bits of .ts_index to be overloaded with flags in the future. */ +#define LC_MAX_ACTIVE (1<<24) /* statistics */ - unsigned int used; + unsigned used; /* number of lelements currently on in_use list */ unsigned long hits, misses, starving, dirty, changed; /* see below: flag-bits for lru_cache */ @@ -190,8 +210,9 @@ struct lru_cache { void *lc_private; const char *name; - struct hlist_head slot[0]; - /* hash colision chains here, then element storage. */ + /* nr_elements there */ + struct hlist_head *lc_slot; + struct lc_element **lc_element; }; @@ -217,8 +238,8 @@ enum { #define LC_DIRTY (1<<__LC_DIRTY) #define LC_STARVING (1<<__LC_STARVING) -extern struct lru_cache *lc_create(const char *name, unsigned int e_count, - size_t e_size, size_t e_off); +extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned e_count, size_t e_size, size_t e_off); extern void lc_reset(struct lru_cache *lc); extern void lc_destroy(struct lru_cache *lc); extern void lc_set(struct lru_cache *lc, unsigned int enr, int index); @@ -236,15 +257,22 @@ extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, void (*detail) (struct seq_file *, struct lc_element *)); -/* This can be used to stop lc_get from changing the set of active elements. - * Note that the reference counts and order on the lru list may still change. - * returns true if we aquired the lock. +/** + * lc_try_lock - can be used to stop lc_get() from changing the tracked set + * @lc: the lru cache to operate on + * + * Note that the reference counts and order on the active and lru lists may + * still change. Returns true if we aquired the lock. */ static inline int lc_try_lock(struct lru_cache *lc) { return !test_and_set_bit(__LC_DIRTY, &lc->flags); } +/** + * lc_unlock - unlock @lc, allow lc_get() to change the set again + * @lc: the lru cache to operate on + */ static inline void lc_unlock(struct lru_cache *lc) { clear_bit(__LC_DIRTY, &lc->flags); @@ -257,29 +285,10 @@ static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) return e && e->refcnt; } -#define LC_FREE (-1U) - #define lc_entry(ptr, type, member) \ container_of(ptr, type, member) -static inline struct lc_element * -lc_element_by_index(struct lru_cache *lc, unsigned int i) -{ - BUG_ON(i >= lc->nr_elements); - return (struct lc_element *)( - ((char *)(lc->slot + lc->nr_elements)) + - i * lc->element_size - + lc->element_off); -} - -static inline size_t lc_index_of(struct lru_cache *lc, struct lc_element *e) -{ - size_t i = ((char *)(e) - lc->element_off - - ((char *)(lc->slot + lc->nr_elements))) - / lc->element_size; - BUG_ON(i >= lc->nr_elements); - BUG_ON(e != lc_element_by_index(lc, i)); - return i; -} +extern struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i); +extern unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e); #endif diff --git a/lib/lru_cache.c b/lib/lru_cache.c index f8632f1f7f7c..ab11a710b6e2 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -30,78 +30,134 @@ #include /* for seq_printf */ #include -/* this is developers aid only! */ -#define PARANOIA_ENTRY() BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)) -#define PARANOIA_LEAVE() do { clear_bit(__LC_PARANOIA, &lc->flags); smp_mb__after_clear_bit(); } while (0) -#define RETURN(x...) do { PARANOIA_LEAVE(); return x ; } while (0) +MODULE_AUTHOR("Philipp Reisner , " + "Lars Ellenberg "); +MODULE_DESCRIPTION("lru_cache - Track sets of hot objects"); +MODULE_LICENSE("GPL"); + +/* this is developers aid only. + * it catches concurrent access (lack of locking on the users part) */ +#define PARANOIA_ENTRY() do { \ + BUG_ON(!lc); \ + BUG_ON(!lc->nr_elements); \ + BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \ +} while (0) + +#define RETURN(x...) do { \ + clear_bit(__LC_PARANOIA, &lc->flags); \ + smp_mb__after_clear_bit(); return x ; } while (0) + +/* BUG() if e is not one of the elements tracked by lc */ +#define PARANOIA_LC_ELEMENT(lc, e) do { \ + struct lru_cache *lc_ = (lc); \ + struct lc_element *e_ = (e); \ + unsigned i = e_->lc_index; \ + BUG_ON(i >= lc_->nr_elements); \ + BUG_ON(lc_->lc_element[i] != e_); } while (0) -static size_t size_of_lc(unsigned int e_count, size_t e_size) -{ - return sizeof(struct lru_cache) - + e_count * (e_size + sizeof(struct hlist_head)); -} - -static void lc_init(struct lru_cache *lc, - const size_t bytes, const char *name, - const unsigned int e_count, const size_t e_size, - const size_t e_off) +/** + * lc_create - prepares to track objects in an active set + * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @e_count: number of elements allowed to be active simultaneously + * @e_size: size of the tracked objects + * @e_off: offset to the &struct lc_element member in a tracked object + * + * Returns a pointer to a newly initialized struct lru_cache on success, + * or NULL on (allocation) failure. + */ +struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned e_count, size_t e_size, size_t e_off) { + struct hlist_head *slot = NULL; + struct lc_element **element = NULL; + struct lru_cache *lc; struct lc_element *e; - unsigned int i; + unsigned cache_obj_size = kmem_cache_size(cache); + unsigned i; - BUG_ON(!e_count); + WARN_ON(cache_obj_size < e_size); + if (cache_obj_size < e_size) + return NULL; + + /* e_count too big; would probably fail the allocation below anyways. + * for typical use cases, e_count should be few thousand at most. */ + if (e_count > LC_MAX_ACTIVE) + return NULL; + + slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL); + if (!slot) + goto out_fail; + element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL); + if (!element) + goto out_fail; + + lc = kzalloc(sizeof(*lc), GFP_KERNEL); + if (!lc) + goto out_fail; - memset(lc, 0, bytes); INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + + lc->name = name; lc->element_size = e_size; - lc->element_off = e_off; - lc->nr_elements = e_count; - lc->new_number = -1; - lc->name = name; + lc->element_off = e_off; + lc->nr_elements = e_count; + lc->new_number = LC_FREE; + lc->lc_cache = cache; + lc->lc_element = element; + lc->lc_slot = slot; + + /* preallocate all objects */ for (i = 0; i < e_count; i++) { - e = lc_element_by_index(lc, i); + void *p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) + break; + memset(p, 0, lc->element_size); + e = p + e_off; + e->lc_index = i; e->lc_number = LC_FREE; list_add(&e->list, &lc->free); - /* memset(,0,) did the rest of init for us */ + element[i] = e; + } + if (i == e_count) + return lc; + + /* else: could not allocate all elements, give up */ + for (i--; i; i--) { + void *p = element[i]; + kmem_cache_free(cache, p - e_off); } + kfree(lc); +out_fail: + kfree(element); + kfree(slot); + return NULL; } -/** - * lc_create - prepares to track objects in an active set - * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump - * @e_count: number of elements allowed to be active simultaneously - * @e_size: size of the tracked objects - * @e_off: offset to the &struct lc_element member in a tracked object - * - * Returns a pointer to a newly initialized struct lru_cache on success, - * or NULL on (allocation) failure. - */ -struct lru_cache *lc_create(const char *name, unsigned int e_count, - size_t e_size, size_t e_off) +void lc_free_by_index(struct lru_cache *lc, unsigned i) { - struct lru_cache *lc; - size_t bytes; - - BUG_ON(!e_count); - BUG_ON(e_size < sizeof(struct lc_element)); - BUG_ON(e_size - sizeof(struct lc_element) < e_off); - e_size = ALIGN(e_size, sizeof(void *)); - e_size = max(sizeof(struct lc_element), e_size); - bytes = size_of_lc(e_count, e_size); - lc = kmalloc(bytes, GFP_KERNEL); - if (lc) - lc_init(lc, bytes, name, e_count, e_size, e_off); - return lc; + void *p = lc->lc_element[i]; + WARN_ON(!p); + if (p) { + p -= lc->element_off; + kmem_cache_free(lc->lc_cache, p); + } } /** * lc_destroy - frees memory allocated by lc_create() - * @lc: the lru cache to operate on + * @lc: the lru cache to destroy */ void lc_destroy(struct lru_cache *lc) { + unsigned i; + if (!lc) + return; + for (i = 0; i < lc->nr_elements; i++) + lc_free_by_index(lc, i); + kfree(lc->lc_element); + kfree(lc->lc_slot); kfree(lc); } @@ -114,14 +170,38 @@ void lc_destroy(struct lru_cache *lc) */ void lc_reset(struct lru_cache *lc) { - lc_init(lc, size_of_lc(lc->nr_elements, lc->element_size), lc->name, - lc->nr_elements, lc->element_size, lc->element_off); + unsigned i; + + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + lc->used = 0; + lc->hits = 0; + lc->misses = 0; + lc->starving = 0; + lc->dirty = 0; + lc->changed = 0; + lc->flags = 0; + lc->changing_element = NULL; + lc->new_number = LC_FREE; + memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); + + for (i = 0; i < lc->nr_elements; i++) { + struct lc_element *e = lc->lc_element[i]; + void *p = e; + p -= lc->element_off; + memset(p, 0, lc->element_size); + /* re-init it */ + e->lc_index = i; + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + } } /** - * lc_seq_printf_stats - print stats about @ts into @seq + * lc_seq_printf_stats - print stats about @lc into @seq * @seq: the seq_file to print into - * @ts: the tracked set to print statistics of + * @lc: the lru cache to print statistics of */ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) { @@ -138,9 +218,9 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); } -static unsigned int lc_hash_fn(struct lru_cache *lc, unsigned int enr) +static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) { - return enr % lc->nr_elements; + return lc->lc_slot + (enr % lc->nr_elements); } @@ -159,7 +239,8 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) struct lc_element *e; BUG_ON(!lc); - hlist_for_each_entry(e, n, lc->slot + lc_hash_fn(lc, enr), colision) { + BUG_ON(!lc->nr_elements); + hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { if (e->lc_number == enr) return e; } @@ -178,6 +259,8 @@ static struct lc_element *lc_evict(struct lru_cache *lc) n = lc->lru.prev; e = list_entry(n, struct lc_element, list); + PARANOIA_LC_ELEMENT(lc, e); + list_del(&e->list); hlist_del(&e->colision); return e; @@ -194,14 +277,12 @@ static struct lc_element *lc_evict(struct lru_cache *lc) void lc_del(struct lru_cache *lc, struct lc_element *e) { PARANOIA_ENTRY(); - BUG_ON(e < lc_element_by_index(lc, 0)); - BUG_ON(e > lc_element_by_index(lc, lc->nr_elements-1)); + PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt); - list_del(&e->list); - hlist_del_init(&e->colision); + e->lc_number = LC_FREE; - e->refcnt = 0; - list_add(&e->list, &lc->free); + hlist_del_init(&e->colision); + list_move(&e->list, &lc->free); RETURN(); } @@ -243,11 +324,11 @@ static int lc_unused_element_available(struct lru_cache *lc) * * Return values: * NULL - * The cache was marked %TS_STARVING, + * The cache was marked %LC_STARVING, * or the requested label was not in the active set * and a changing transaction is still pending (@lc was marked %LC_DIRTY). - * Or no unused or free element could be recycled (@ts will be marked as - * %TS_STARVING, blocking further ts_get() operations). + * Or no unused or free element could be recycled (@lc will be marked as + * %LC_STARVING, blocking further lc_get() operations). * * pointer to the element with the REQUESTED element number. * In this case, it can be used right away @@ -269,9 +350,6 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) { struct lc_element *e; - BUG_ON(!lc); - BUG_ON(!lc->nr_elements); - PARANOIA_ENTRY(); if (lc->flags & LC_STARVING) { ++lc->starving; @@ -328,9 +406,6 @@ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) { struct lc_element *e; - BUG_ON(!lc); - BUG_ON(!lc->nr_elements); - PARANOIA_ENTRY(); if (lc->flags & LC_STARVING) { ++lc->starving; @@ -356,13 +431,13 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e) { PARANOIA_ENTRY(); BUG_ON(e != lc->changing_element); + PARANOIA_LC_ELEMENT(lc, e); ++lc->changed; e->lc_number = lc->new_number; list_add(&e->list, &lc->in_use); - hlist_add_head(&e->colision, - lc->slot + lc_hash_fn(lc, lc->new_number)); + hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); lc->changing_element = NULL; - lc->new_number = -1; + lc->new_number = LC_FREE; clear_bit(__LC_DIRTY, &lc->flags); smp_mb__after_clear_bit(); RETURN(); @@ -375,16 +450,13 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e) * @e: the element to put * * If refcnt reaches zero, the element is moved to the lru list, - * and a %TS_STARVING (if set) is cleared. + * and a %LC_STARVING (if set) is cleared. * Returns the new (post-decrement) refcnt. */ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) { - BUG_ON(!lc); - BUG_ON(!lc->nr_elements); - BUG_ON(!e); - PARANOIA_ENTRY(); + PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt == 0); BUG_ON(e == lc->changing_element); if (--e->refcnt == 0) { @@ -397,6 +469,29 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) RETURN(e->refcnt); } +/** + * lc_element_by_index + * @lc: the lru cache to operate on + * @i: the index of the element to return + */ +struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) +{ + BUG_ON(i >= lc->nr_elements); + BUG_ON(lc->lc_element[i] == NULL); + BUG_ON(lc->lc_element[i]->lc_index != i); + return lc->lc_element[i]; +} + +/** + * lc_index_of + * @lc: the lru cache to operate on + * @e: the element to query for its index position in lc->element + */ +unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_LC_ELEMENT(lc, e); + return e->lc_index; +} /** * lc_set - associate index with label @@ -417,7 +512,7 @@ void lc_set(struct lru_cache *lc, unsigned int enr, int index) e->lc_number = enr; hlist_del_init(&e->colision); - hlist_add_head(&e->colision, lc->slot + lc_hash_fn(lc, enr)); + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); } @@ -443,8 +538,7 @@ void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext seq_printf(seq, "\t%2d: FREE\n", i); } else { seq_printf(seq, "\t%2d: %4u %4u ", i, - e->lc_number, - e->refcnt); + e->lc_number, e->refcnt); detail(seq, e); } } @@ -460,5 +554,7 @@ EXPORT_SYMBOL(lc_find); EXPORT_SYMBOL(lc_get); EXPORT_SYMBOL(lc_put); EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_element_by_index); +EXPORT_SYMBOL(lc_index_of); EXPORT_SYMBOL(lc_seq_printf_stats); EXPORT_SYMBOL(lc_seq_dump_details); -- cgit v1.2.3 From d2843e09d25504e548ecf473d136636b040c9e6f Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 6 Jul 2009 17:18:59 +0200 Subject: Following DRBD mainline. now at 8.3.2 * drbd-8.3: Preparing the 8.3.2 release Fix a wronge use of kmap_atomic(..., KM_IRQ1) in the bitmap code [bugz 238] Added some missing statics remove an incorrect ASSERT Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- MAINTAINERS | 13 +++++++++++++ drivers/block/drbd/drbd_bitmap.c | 16 ++++++++-------- drivers/block/drbd/drbd_worker.c | 8 +++++--- include/linux/drbd.h | 2 +- 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index fa2a16def17a..a3513789fef5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1939,6 +1939,19 @@ S: Maintained F: drivers/scsi/dpt* F: drivers/scsi/dpt/ +DRBD DRIVER +P: Philipp Reisner +P: Lars Ellenberg +M: drbd-dev@lists.linbit.com +L: drbd-user@lists.linbit.com +W: http://www.drbd.org +T: git git://git.drbd.org/linux-2.6-drbd.git drbd +T: git git://git.drbd.org/drbd-8.3.git +S: Supported +F: drivers/block/drbd/ +F: lib/lru_cache.c +F: Documentation/blockdev/drbd/ + DRIVER CORE, KOBJECTS, AND SYSFS P: Greg Kroah-Hartman M: gregkh@suse.de diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 417da6e3cea3..2858ef5f5c0c 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -164,17 +164,17 @@ static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset return (unsigned long *) kmap_atomic(page, km); } -unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset) +static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset) { return __bm_map_paddr(b, offset, KM_IRQ1); } -void __bm_unmap(unsigned long *p_addr, const enum km_type km) +static void __bm_unmap(unsigned long *p_addr, const enum km_type km) { kunmap_atomic(p_addr, km); }; -void bm_unmap(unsigned long *p_addr) +static void bm_unmap(unsigned long *p_addr) { return __bm_unmap(p_addr, KM_IRQ1); } @@ -367,7 +367,7 @@ static void bm_set_surplus(struct drbd_bitmap *b) bm_unmap(p_addr); } -static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian) +static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian, const enum km_type km) { unsigned long *p_addr, *bm, offset = 0; unsigned long bits = 0; @@ -375,7 +375,7 @@ static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endia while (offset < b->bm_words) { i = do_now = min_t(size_t, b->bm_words-offset, LWPP); - p_addr = bm_map_paddr(b, offset); + p_addr = __bm_map_paddr(b, offset, km); bm = p_addr + MLPP(offset); while (i--) { #ifndef __LITTLE_ENDIAN @@ -384,7 +384,7 @@ static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endia #endif bits += hweight_long(*bm++); } - bm_unmap(p_addr); + __bm_unmap(p_addr, km); offset += do_now; } @@ -393,12 +393,12 @@ static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endia static unsigned long bm_count_bits(struct drbd_bitmap *b) { - return __bm_count_bits(b, 0); + return __bm_count_bits(b, 0, KM_IRQ1); } static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b) { - return __bm_count_bits(b, 1); + return __bm_count_bits(b, 1, KM_USER0); } void _drbd_bm_recount_bits(struct drbd_conf *mdev, char *file, int line) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 29c5bba88998..d93a93dd4829 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1410,10 +1410,12 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) return; } - if (ns.conn == C_SYNC_TARGET) { - D_ASSERT(!test_bit(STOP_SYNC_TIMER, &mdev->flags)); + /* ns.conn may already be != mdev->state.conn, + * we may have been paused in between, or become paused until + * the timer triggers. + * No matter, that is handled in resync_timer_fn() */ + if (ns.conn == C_SYNC_TARGET) mod_timer(&mdev->resync_timer, jiffies); - } drbd_md_sync(mdev); } diff --git a/include/linux/drbd.h b/include/linux/drbd.h index dc478c648e37..8f1e2b3617df 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.2rc2" +#define REL_VERSION "8.3.2" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 90 -- cgit v1.2.3 From 40fa7e2a773f972b644e3758230672a5f110d8ee Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 24 Jul 2009 13:05:19 +0200 Subject: Improvements to the bitmap code * Removed all calls to drbd_bm_recount_bits() * add missing recv_cnt accounting for diskless read * more efficiently set large areas of bitmap Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 125 ++++++++++++++++++++++++------------- drivers/block/drbd/drbd_int.h | 5 +- drivers/block/drbd/drbd_receiver.c | 6 +- drivers/block/drbd/drbd_worker.c | 4 -- 4 files changed, 87 insertions(+), 53 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 2858ef5f5c0c..3ebaa97666be 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -401,29 +401,6 @@ static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b) return __bm_count_bits(b, 1, KM_USER0); } -void _drbd_bm_recount_bits(struct drbd_conf *mdev, char *file, int line) -{ - struct drbd_bitmap *b = mdev->bitmap; - unsigned long flags, bits; - - ERR_IF(!b) return; - - /* IMO this should be inside drbd_bm_lock/unlock. - * Unfortunately it is used outside of the locks. - * And I'm not yet sure where we need to place the - * lock/unlock correctly. - */ - - spin_lock_irqsave(&b->bm_lock, flags); - bits = bm_count_bits(b); - if (bits != b->bm_set) { - dev_err(DEV, "bm_set was %lu, corrected to %lu. %s:%d\n", - b->bm_set, bits, file, line); - b->bm_set = bits; - } - spin_unlock_irqrestore(&b->bm_lock, flags); -} - /* offset and len in long words.*/ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) { @@ -1031,10 +1008,10 @@ unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_f * for val != 0, we change 0 -> 1, return code positiv * for val == 0, we change 1 -> 0, return code negative * wants bitnr, not sector. + * expected to be called for only a few bits (e - s about BITS_PER_LONG). * Must hold bitmap lock already. */ - int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, - const unsigned long e, int val, const enum km_type km) + unsigned long e, int val, const enum km_type km) { struct drbd_bitmap *b = mdev->bitmap; unsigned long *p_addr = NULL; @@ -1042,23 +1019,24 @@ int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, unsigned long last_page_nr = -1UL; int c = 0; + if (e >= b->bm_bits) { + dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n", + s, e, b->bm_bits); + e = b->bm_bits ? b->bm_bits -1 : 0; + } for (bitnr = s; bitnr <= e; bitnr++) { - ERR_IF (bitnr >= b->bm_bits) { - dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); - } else { - unsigned long offset = bitnr>>LN2_BPL; - unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); - if (page_nr != last_page_nr) { - if (p_addr) - __bm_unmap(p_addr, km); - p_addr = __bm_map_paddr(b, offset, km); - last_page_nr = page_nr; - } - if (val) - c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr)); - else - c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr)); + unsigned long offset = bitnr>>LN2_BPL; + unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3); + if (page_nr != last_page_nr) { + if (p_addr) + __bm_unmap(p_addr, km); + p_addr = __bm_map_paddr(b, offset, km); + last_page_nr = page_nr; } + if (val) + c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr)); + else + c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr)); } if (p_addr) __bm_unmap(p_addr, km); @@ -1102,12 +1080,73 @@ int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsi return -bm_change_bits_to(mdev, s, e, 0); } +static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, + int page_nr, int first_word, int last_word) +{ + int i; + int bits; + unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0); + for (i = first_word; i < last_word; i++) { + bits = hweight_long(paddr[i]); + paddr[i] = ~0UL; + b->bm_set += BITS_PER_LONG - bits; + } + kunmap_atomic(paddr, KM_USER0); +} + /* the same thing, but without taking the spin_lock_irqsave. * you must first drbd_bm_lock(). */ -int _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) +void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) { - /* WARN_ON(!bm_is_locked(b)); */ - return __bm_change_bits_to(mdev, s, e, 1, KM_USER0); + /* s <= sl <= el <= e */ + /* first set_bit from the first bit (s) + * up to the next long boundary (sl), + * then assign full words including the last long boundary (el), + * then set_bit up to the last bit (e). + * do not use memset, because we have need to account for changes, + * so we need to loop over the words with hweight() anyways. + */ + unsigned long sl = ALIGN(s,BITS_PER_LONG); + unsigned long el = (e & ~((unsigned long)BITS_PER_LONG-1)) -1; + int first_page; + int last_page; + int page_nr; + int first_word; + int last_word; + + if (e - s <= 3*BITS_PER_LONG) { + /* don't bother; el and sl may even be wrong. */ + __bm_change_bits_to(mdev, s, e, 1, KM_USER0); + return; + } + + /* difference is large enough that we can trust sl and el */ + + /* bits filling the current long */ + if (sl) + __bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0); + + first_page = sl >> (3 + PAGE_SHIFT); + last_page = el >> (3 + PAGE_SHIFT); + + /* MLPP: modulo longs per page */ + /* LWPP: long words per page */ + first_word = MLPP(sl >> LN2_BPL); + last_word = LWPP; + + /* first and full pages, unless first page == last page */ + for (page_nr = first_page; page_nr < last_page; page_nr++) { + bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word); + cond_resched(); + first_word = 0; + } + + /* last page (respectively only page, for first page == last page) */ + last_word = MLPP(el >> LN2_BPL); + bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); + + /* possibly trailing bits */ + __bm_change_bits_to(mdev, el+1, e, 1, KM_USER0); } /* returns bit state diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a63595d80579..27ea07013935 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1306,7 +1306,7 @@ extern int drbd_bm_set_bits( extern int drbd_bm_clear_bits( struct drbd_conf *mdev, unsigned long s, unsigned long e); /* bm_set_bits variant for use while holding drbd_bm_lock */ -extern int _drbd_bm_set_bits(struct drbd_conf *mdev, +extern void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e); extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); @@ -1334,9 +1334,6 @@ extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); extern void drbd_bm_unlock(struct drbd_conf *mdev); -extern void _drbd_bm_recount_bits(struct drbd_conf *mdev, char *file, int line); -#define drbd_bm_recount_bits(mdev) \ - _drbd_bm_recount_bits(mdev, __FILE__, __LINE__) extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e); /* drbd_main.c */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b222b24ddc51..27cd2dd25b83 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1381,6 +1381,10 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, data_size -= dgs; + /* optimistically update recv_cnt. if receiving fails below, + * we disconnect anyways, and counters will be reset. */ + mdev->recv_cnt += data_size>>9; + bio = req->master_bio; D_ASSERT(sector == bio->bi_sector); @@ -2484,8 +2488,6 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } - drbd_bm_recount_bits(mdev); - return rv; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d93a93dd4829..fac03aea6bf7 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -796,8 +796,6 @@ out: drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); } - drbd_bm_recount_bits(mdev); - if (khelper_cmd) drbd_khelper(mdev, khelper_cmd); @@ -1329,8 +1327,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, "Resync starting: side=%s\n", side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource"); - drbd_bm_recount_bits(mdev); - /* In case a previous resync run was aborted by an IO error/detach on the peer. */ drbd_rs_cancel_all(mdev); -- cgit v1.2.3 From 8f37d2cb4b18588b631b795e84cadb7210f1b594 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 24 Jul 2009 16:52:36 +0200 Subject: set bits: optimize for complete last word, fix off-by-one-word corner case Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 3ebaa97666be..474025a6d3eb 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1080,6 +1080,8 @@ int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsi return -bm_change_bits_to(mdev, s, e, 0); } +/* sets all bits in full words, + * from first_word up to, but not including, last_word */ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, int page_nr, int first_word, int last_word) { @@ -1094,20 +1096,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, kunmap_atomic(paddr, KM_USER0); } -/* the same thing, but without taking the spin_lock_irqsave. - * you must first drbd_bm_lock(). */ +/* same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave. + * you must first drbd_bm_lock(). + * Sets bits from s to e _inclusive_. */ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) { - /* s <= sl <= el <= e */ - /* first set_bit from the first bit (s) + /* First set_bit from the first bit (s) * up to the next long boundary (sl), - * then assign full words including the last long boundary (el), - * then set_bit up to the last bit (e). - * do not use memset, because we have need to account for changes, + * then assign full words up to the last long boundary (el), + * then set_bit up to and including the last bit (e). + * + * Do not use memset, because we have need to account for changes, * so we need to loop over the words with hweight() anyways. */ unsigned long sl = ALIGN(s,BITS_PER_LONG); - unsigned long el = (e & ~((unsigned long)BITS_PER_LONG-1)) -1; + unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1); int first_page; int last_page; int page_nr; @@ -1146,7 +1149,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); /* possibly trailing bits */ - __bm_change_bits_to(mdev, el+1, e, 1, KM_USER0); + __bm_change_bits_to(mdev, el, e, 1, KM_USER0); } /* returns bit state @@ -1185,7 +1188,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) return i; } -/* returns number of bits set */ +/* returns number of bits set in the range [s, e] */ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) { unsigned long flags; -- cgit v1.2.3 From e19ad91c068b351d2af1617264a6ef58aa1fbc08 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 24 Jul 2009 17:01:26 +0200 Subject: Kbuild: added a default of "m" Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index f133a8925cbb..8b14ea2c43f8 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -9,6 +9,7 @@ config BLK_DEV_DRBD tristate "DRBD Distributed Replicated Block Device support" depends on PROC_FS && INET && CONNECTOR select LRU_CACHE + default m help NOTE: In order to authenticate connections you have to select @@ -35,17 +36,18 @@ config BLK_DEV_DRBD For automatic failover you need a cluster manager (e.g. heartbeat). See also: http://www.drbd.org/, http://www.linux-ha.org - If unsure, say N. + If unsure, say M. config DRBD_TRACE tristate "DRBD tracing" depends on BLK_DEV_DRBD select TRACEPOINTS + default m help Say Y here if you want to be able to trace various events in DRBD. - If unsure, say N. + If unsure, say M. config DRBD_FAULT_INJECTION bool "DRBD fault injection" -- cgit v1.2.3 From aedbcc28fb824c43ca7a0f9ac7240afff29c3966 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 27 Jul 2009 10:58:47 +0200 Subject: drbd-8.3: drbd_strings: symblol clashes, drbd_bitmap: assert drbd_strings: Renamed the functions to the drbd_ namespace; export them from drbd.ko to drbd_trace.ko drbd_bitmap: set bits: do not trigger assert for aligned very last bit. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Makefile | 2 +- drivers/block/drbd/drbd_bitmap.c | 16 +++++++++++----- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_main.c | 36 +++++++++++++++++++++--------------- drivers/block/drbd/drbd_proc.c | 10 +++++----- drivers/block/drbd/drbd_receiver.c | 10 +++++----- drivers/block/drbd/drbd_strings.c | 8 ++++---- drivers/block/drbd/drbd_tracing.c | 8 ++++---- drivers/block/drbd/drbd_worker.c | 4 ++-- include/linux/drbd.h | 8 ++++---- 10 files changed, 60 insertions(+), 48 deletions(-) diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 68d1e7ce9aa3..7d86ef8a8b40 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -2,7 +2,7 @@ drbd-y := drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o -drbd_trace-y := drbd_tracing.o drbd_strings.o +drbd_trace-y := drbd_tracing.o obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o obj-$(CONFIG_DRBD_TRACE) += drbd_trace.o diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 474025a6d3eb..5b7c763e59d7 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1096,8 +1096,9 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, kunmap_atomic(paddr, KM_USER0); } -/* same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave. - * you must first drbd_bm_lock(). +/* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave. + * You must first drbd_bm_lock(). + * Can be called to set the whole bitmap in one go. * Sets bits from s to e _inclusive_. */ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e) { @@ -1106,7 +1107,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * then assign full words up to the last long boundary (el), * then set_bit up to and including the last bit (e). * - * Do not use memset, because we have need to account for changes, + * Do not use memset, because we must account for changes, * so we need to loop over the words with hweight() anyways. */ unsigned long sl = ALIGN(s,BITS_PER_LONG); @@ -1148,8 +1149,13 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi last_word = MLPP(el >> LN2_BPL); bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); - /* possibly trailing bits */ - __bm_change_bits_to(mdev, el, e, 1, KM_USER0); + /* possibly trailing bits. + * example: (e & 63) == 63, el will be e+1. + * if that even was the very last bit, + * it would trigger an assert in __bm_change_bits_to() + */ + if (el <= e) + __bm_change_bits_to(mdev, el, e, 1, KM_USER0); } /* returns bit state diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 27ea07013935..aa35a48e106c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1486,8 +1486,8 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo); /* drbd_proc.c */ extern struct proc_dir_entry *drbd_proc; extern struct file_operations drbd_proc_fops; -extern const char *conns_to_name(enum drbd_conns s); -extern const char *roles_to_name(enum drbd_role s); +extern const char *drbd_conn_str(enum drbd_conns s); +extern const char *drbd_role_str(enum drbd_role s); /* drbd_actlog.c */ extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector); @@ -2002,7 +2002,7 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, */ smp_rmb(); dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", - conns_to_name(mdev->state.conn), + drbd_conn_str(mdev->state.conn), *bits_left, mdev->rs_total, mdev->rs_failed); *per_mil_done = 0; } else { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 73c6a9da7645..7bde52d02f60 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -664,11 +664,11 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) { dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n", name, - conns_to_name(ns.conn), - roles_to_name(ns.role), - roles_to_name(ns.peer), - disks_to_name(ns.disk), - disks_to_name(ns.pdsk), + drbd_conn_str(ns.conn), + drbd_role_str(ns.role), + drbd_role_str(ns.peer), + drbd_disk_str(ns.disk), + drbd_disk_str(ns.pdsk), ns.susp ? 's' : 'r', ns.aftr_isp ? 'a' : '-', ns.peer_isp ? 'p' : '-', @@ -681,25 +681,25 @@ void print_st_err(struct drbd_conf *mdev, { if (err == SS_IN_TRANSIENT_STATE) return; - dev_err(DEV, "State change failed: %s\n", set_st_err_name(err)); + dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err)); print_st(mdev, " state", os); print_st(mdev, "wanted", ns); } -#define peers_to_name roles_to_name -#define pdsks_to_name disks_to_name +#define drbd_peer_str drbd_role_str +#define drbd_pdsk_str drbd_disk_str -#define susps_to_name(A) ((A) ? "1" : "0") -#define aftr_isps_to_name(A) ((A) ? "1" : "0") -#define peer_isps_to_name(A) ((A) ? "1" : "0") -#define user_isps_to_name(A) ((A) ? "1" : "0") +#define drbd_susp_str(A) ((A) ? "1" : "0") +#define drbd_aftr_isp_str(A) ((A) ? "1" : "0") +#define drbd_peer_isp_str(A) ((A) ? "1" : "0") +#define drbd_user_isp_str(A) ((A) ? "1" : "0") #define PSC(A) \ ({ if (ns.A != os.A) { \ pbp += sprintf(pbp, #A "( %s -> %s ) ", \ - A##s_to_name(os.A), \ - A##s_to_name(ns.A)); \ + drbd_##A##_str(os.A), \ + drbd_##A##_str(ns.A)); \ } }) /** @@ -1006,7 +1006,7 @@ int __drbd_set_state(struct drbd_conf *mdev, if (is_valid_state(mdev, os) == rv) { dev_err(DEV, "Considering state change from bad state. " "Error would be: '%s'\n", - set_st_err_name(rv)); + drbd_set_st_err_str(rv)); print_st(mdev, "old", os); print_st(mdev, "new", ns); rv = is_valid_state_transition(mdev, ns, os); @@ -3745,3 +3745,9 @@ const char *drbd_buildtag(void) module_init(drbd_init) module_exit(drbd_cleanup) + +/* For drbd_tracing: */ +EXPORT_SYMBOL(drbd_conn_str); +EXPORT_SYMBOL(drbd_role_str); +EXPORT_SYMBOL(drbd_disk_str); +EXPORT_SYMBOL(drbd_set_st_err_str); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 432a7dd39f7c..98fcb7450c76 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -190,7 +190,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n"); } - sn = conns_to_name(mdev->state.conn); + sn = drbd_conn_str(mdev->state.conn); if (mdev->state.conn == C_STANDALONE && mdev->state.disk == D_DISKLESS && @@ -202,10 +202,10 @@ static int drbd_seq_show(struct seq_file *seq, void *v) " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c", i, sn, - roles_to_name(mdev->state.role), - roles_to_name(mdev->state.peer), - disks_to_name(mdev->state.disk), - disks_to_name(mdev->state.pdsk), + drbd_role_str(mdev->state.role), + drbd_role_str(mdev->state.peer), + drbd_disk_str(mdev->state.disk), + drbd_disk_str(mdev->state.pdsk), (mdev->net_conf == NULL ? ' ' : (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')), mdev->state.susp ? 's' : 'r', diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 27cd2dd25b83..f884baf961fd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2043,7 +2043,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) if (mdev->state.conn >= C_CONNECTED && mdev->state.conn != C_VERIFY_T) dev_warn(DEV, "ASSERT FAILED: got P_OV_REQUEST while being %s\n", - conns_to_name(mdev->state.conn)); + drbd_conn_str(mdev->state.conn)); if (mdev->ov_start_sector == ~(sector_t)0 && mdev->agreed_pro_version >= 90) { mdev->ov_start_sector = sector; @@ -3014,7 +3014,7 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) real_peer_disk = peer_state.disk; if (peer_state.disk == D_NEGOTIATING) { real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT; - dev_info(DEV, "real peer disk state = %s\n", disks_to_name(real_peer_disk)); + dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); } spin_lock_irq(&mdev->req_lock); @@ -3354,7 +3354,7 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) /* admin may have requested C_DISCONNECTING, * other threads may have noticed network errors */ dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n", - conns_to_name(mdev->state.conn)); + drbd_conn_str(mdev->state.conn)); } ok = TRUE; @@ -3511,7 +3511,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) return; if (mdev->state.conn >= C_WF_CONNECTION) dev_err(DEV, "ASSERT FAILED cstate = %s, expected < WFConnection\n", - conns_to_name(mdev->state.conn)); + drbd_conn_str(mdev->state.conn)); /* asender does not clean up anything. it must not interfere, either */ drbd_thread_stop(&mdev->asender); @@ -3948,7 +3948,7 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) } else { set_bit(CL_ST_CHG_FAIL, &mdev->flags); dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", - set_st_err_name(retcode), retcode); + drbd_set_st_err_str(retcode), retcode); } wake_up(&mdev->state_wait); diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index 09922d2d5bf9..b39f1b3a0cbd 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c @@ -89,23 +89,23 @@ static const char *drbd_state_sw_errors[] = { [-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted", }; -const char *conns_to_name(enum drbd_conns s) +const char *drbd_conn_str(enum drbd_conns s) { /* enums are unsigned... */ return s > C_PAUSED_SYNC_T ? "TOO_LARGE" : drbd_conn_s_names[s]; } -const char *roles_to_name(enum drbd_role s) +const char *drbd_role_str(enum drbd_role s) { return s > R_SECONDARY ? "TOO_LARGE" : drbd_role_s_names[s]; } -const char *disks_to_name(enum drbd_disk_state s) +const char *drbd_disk_str(enum drbd_disk_state s) { return s > D_UP_TO_DATE ? "TOO_LARGE" : drbd_disk_s_names[s]; } -const char *set_st_err_name(enum drbd_state_ret_codes err) +const char *drbd_set_st_err_str(enum drbd_state_ret_codes err) { return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" : err > SS_TWO_PRIMARIES ? "TOO_LARGE" diff --git a/drivers/block/drbd/drbd_tracing.c b/drivers/block/drbd/drbd_tracing.c index f2827209ca34..86509cc77634 100644 --- a/drivers/block/drbd/drbd_tracing.c +++ b/drivers/block/drbd/drbd_tracing.c @@ -484,19 +484,19 @@ static void probe_drbd_req(struct drbd_request *req, enum drbd_req_event what, c req->epoch, (unsigned long long)req->sector, req->size, - conns_to_name(mdev->state.conn)); + drbd_conn_str(mdev->state.conn)); } } -#define peers_to_name roles_to_name -#define pdsks_to_name disks_to_name +#define drbd_peer_str drbd_role_str +#define drbd_pdsk_str drbd_disk_str #define PSM(A) \ do { \ if (mask.A) { \ int i = snprintf(p, len, " " #A "( %s )", \ - A##s_to_name(val.A)); \ + drbd_##A##_str(val.A)); \ if (i >= len) \ return op; \ p += i; \ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index fac03aea6bf7..a6d86e37de30 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -433,7 +433,7 @@ int w_make_resync_request(struct drbd_conf *mdev, if (mdev->state.conn != C_SYNC_TARGET) dev_err(DEV, "%s in w_make_resync_request\n", - conns_to_name(mdev->state.conn)); + drbd_conn_str(mdev->state.conn)); if (!get_ldev(mdev)) { /* Since we only need to access mdev->rsync a @@ -1397,7 +1397,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) if (r == SS_SUCCESS) { dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n", - conns_to_name(ns.conn), + drbd_conn_str(ns.conn), (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), (unsigned long) mdev->rs_total); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 8f1e2b3617df..3a25433a809e 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -275,10 +275,10 @@ enum drbd_state_ret_codes { }; /* from drbd_strings.c */ -extern const char *conns_to_name(enum drbd_conns); -extern const char *roles_to_name(enum drbd_role); -extern const char *disks_to_name(enum drbd_disk_state); -extern const char *set_st_err_name(enum drbd_state_ret_codes); +extern const char *drbd_conn_str(enum drbd_conns); +extern const char *drbd_role_str(enum drbd_role); +extern const char *drbd_disk_str(enum drbd_disk_state); +extern const char *drbd_set_st_err_str(enum drbd_state_ret_codes); #define SHARED_SECRET_MAX 64 -- cgit v1.2.3 From cf7fead8179d58b6df9cd9110c327a3ac338c705 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 27 Jul 2009 16:59:20 +0200 Subject: buildfix: We failed to build of CONFIG_MODULES is not set Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7bde52d02f60..ed3362b3ba02 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3734,9 +3734,11 @@ const char *drbd_buildtag(void) static char buildtag[38] = "\0uilt-in"; if (buildtag[0] == 0) { +#ifdef CONFIG_MODULES if (THIS_MODULE != NULL) sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); else +#endif buildtag[0] = 'b'; } -- cgit v1.2.3 From feb2ddbd7682a9b68dc93f5076323fb913a187b6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 27 Jul 2009 19:12:09 +0200 Subject: follow cpumask api change Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 34 ++++++++++++++-------------------- drivers/block/drbd/drbd_nl.c | 22 ++++++++++++++-------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index aa35a48e106c..5813d7d3b3b0 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1081,10 +1081,10 @@ extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); #ifdef CONFIG_SMP extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); -extern cpumask_t drbd_calc_cpu_mask(struct drbd_conf *mdev); +extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); #else #define drbd_thread_current_set_cpu(A) ({}) -#define drbd_calc_cpu_mask(A) CPU_MASK_ALL +#define drbd_calc_cpu_mask(A) ({}) #endif extern void drbd_free_resources(struct drbd_conf *mdev); extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ed3362b3ba02..20f4d40e8a4a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1545,25 +1545,23 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) * Forces all threads of a device onto the same CPU. This is benificial for * DRBD's performance. May be overwritten by user's configuration. */ -cpumask_t drbd_calc_cpu_mask(struct drbd_conf *mdev) +void drbd_calc_cpu_mask(struct drbd_conf *mdev) { - int sv, cpu; - cpumask_t av_cpu_m; + int ord, cpu; - if (cpus_weight(mdev->cpu_mask)) - return mdev->cpu_mask; - - av_cpu_m = cpu_online_map; - sv = mdev_to_minor(mdev) % cpus_weight(av_cpu_m); + /* user override. */ + if (cpumask_weight(&mdev->cpu_mask)) + return; - for_each_cpu_mask(cpu, av_cpu_m) { - if (sv-- == 0) - return cpumask_of_cpu(cpu); + ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask); + for_each_online_cpu(cpu) { + if (ord-- == 0) { + cpumask_set_cpu(cpu, &mdev->cpu_mask); + return; + } } - - /* some kernel versions "forget" to add the (cpumask_t) typecast - * to that macro, which results in "parse error before '{'" ;-> */ - return (cpumask_t) CPU_MASK_ALL; /* Never reached. */ + /* should not be reached */ + cpumask_setall(&mdev->cpu_mask); } /** @@ -1586,11 +1584,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) if (!thi->reset_cpu_mask) return; thi->reset_cpu_mask = 0; - /* preempt_disable(); - Thas was a kernel that warned about a call to smp_processor_id() while preemt - was not disabled. It seems that this was fixed in manline. */ - set_cpus_allowed(p, mdev->cpu_mask); - /* preempt_enable(); */ + set_cpus_allowed_ptr(p, &mdev->cpu_mask); } #endif diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index c3d438ccd408..936ec73e917b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1524,16 +1524,20 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; struct syncer_conf sc; - cpumask_t n_cpu_mask = CPU_MASK_NONE; + cpumask_var_t new_cpu_mask; - memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); + if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { + retcode = ERR_NOMEM; + goto fail; + } if (nlp->flags & DRBD_NL_SET_DEFAULTS) { memset(&sc, 0, sizeof(struct syncer_conf)); sc.rate = DRBD_RATE_DEF; sc.after = DRBD_AFTER_DEF; sc.al_extents = DRBD_AL_EXTENTS_DEF; - } + } else + memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) { retcode = ERR_MANDATORY_TAG; @@ -1590,8 +1594,9 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } /* silently ignore cpu mask on UP kernel */ - if (NR_CPUS > 1 && sc.cpu_mask[0] != 0) { - err = __bitmap_parse(sc.cpu_mask, 32, 0, (unsigned long *)&n_cpu_mask, NR_CPUS); + if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { + err = __bitmap_parse(sc.cpu_mask, 32, 0, + cpumask_bits(new_cpu_mask), nr_cpu_ids); if (err) { dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); retcode = ERR_CPU_MASK_PARSE; @@ -1652,9 +1657,9 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (mdev->state.conn >= C_CONNECTED) drbd_send_sync_param(mdev, &sc); - if (!cpus_equal(mdev->cpu_mask, n_cpu_mask)) { - mdev->cpu_mask = n_cpu_mask; - mdev->cpu_mask = drbd_calc_cpu_mask(mdev); + if (!cpumask_equal(&mdev->cpu_mask, new_cpu_mask)) { + cpumask_copy(&mdev->cpu_mask, new_cpu_mask); + drbd_calc_cpu_mask(mdev); mdev->receiver.reset_cpu_mask = 1; mdev->asender.reset_cpu_mask = 1; mdev->worker.reset_cpu_mask = 1; @@ -1662,6 +1667,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); fail: + free_cpumask_var(new_cpu_mask); crypto_free_hash(csums_tfm); crypto_free_hash(verify_tfm); reply->ret_code = retcode; -- cgit v1.2.3 From 0efb18396cebbde3b60231382cb434ecba065407 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 28 Jul 2009 16:14:55 +0200 Subject: Getting rid of a compilation warning for the linux-next tree Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 936ec73e917b..1e87ad034024 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1992,9 +1992,8 @@ static struct cn_handler_struct cnd_table[] = { [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 }, }; -static void drbd_connector_callback(void *data) +static void drbd_connector_callback(struct cn_msg *req) { - struct cn_msg *req = data; struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; struct cn_handler_struct *cm; struct cn_msg *cn_reply; @@ -2016,7 +2015,7 @@ static void drbd_connector_callback(void *data) goto fail; } - trace_drbd_netlink(data, 1); + trace_drbd_netlink(req, 1); if (nlp->packet_type >= P_nl_after_last_packet) { retcode = ERR_PACKET_NR; -- cgit v1.2.3 From 17aaa4ae9d0ed9d85e77033ebd825de61c8d17ed Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 29 Jul 2009 12:53:11 +0200 Subject: drdb: use cpumask_var_t in struct drdb_conf Any code which can be compiled on x86 should try to avoid cpumask_t (or even struct cpumask) declarations; we are heading towards struct cpumask being undefined if CONFIG_CPUMASK_OFFSTACK. The code is the same for CONFIG_CPUMASK_OFFSTACK=n. Signed-off-by: Rusty Russell Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 13 +++++++++---- drivers/block/drbd/drbd_nl.c | 4 ++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 5813d7d3b3b0..10fa153d0f1f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1006,7 +1006,7 @@ struct drbd_conf { spinlock_t peer_seq_lock; unsigned int minor; unsigned long comm_bm_set; /* communicated number of set bits. */ - cpumask_t cpu_mask; + cpumask_var_t cpu_mask; struct bm_io_work bm_io_work; u64 ed_uuid; /* UUID of the exposed data */ struct mutex state_mutex; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 20f4d40e8a4a..9c85a4b595f2 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1550,18 +1550,18 @@ void drbd_calc_cpu_mask(struct drbd_conf *mdev) int ord, cpu; /* user override. */ - if (cpumask_weight(&mdev->cpu_mask)) + if (cpumask_weight(mdev->cpu_mask)) return; ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask); for_each_online_cpu(cpu) { if (ord-- == 0) { - cpumask_set_cpu(cpu, &mdev->cpu_mask); + cpumask_set_cpu(cpu, mdev->cpu_mask); return; } } /* should not be reached */ - cpumask_setall(&mdev->cpu_mask); + cpumask_setall(mdev->cpu_mask); } /** @@ -1584,7 +1584,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) if (!thi->reset_cpu_mask) return; thi->reset_cpu_mask = 0; - set_cpus_allowed_ptr(p, &mdev->cpu_mask); + set_cpus_allowed_ptr(p, mdev->cpu_mask); } #endif @@ -3001,6 +3001,8 @@ struct drbd_conf *drbd_new_device(unsigned int minor) mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); if (!mdev) return NULL; + if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL)) + goto out_no_cpumask; mdev->minor = minor; @@ -3079,6 +3081,8 @@ out_no_io_page: out_no_disk: blk_cleanup_queue(q); out_no_q: + free_cpumask_var(mdev->cpu_mask); +out_no_cpumask: kfree(mdev); return NULL; } @@ -3095,6 +3099,7 @@ void drbd_free_mdev(struct drbd_conf *mdev) __free_page(mdev->md_io_page); put_disk(mdev->vdisk); blk_cleanup_queue(mdev->rq_queue); + free_cpumask_var(mdev->cpu_mask); kfree(mdev); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 1e87ad034024..e3b09af69d8b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1657,8 +1657,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (mdev->state.conn >= C_CONNECTED) drbd_send_sync_param(mdev, &sc); - if (!cpumask_equal(&mdev->cpu_mask, new_cpu_mask)) { - cpumask_copy(&mdev->cpu_mask, new_cpu_mask); + if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) { + cpumask_copy(mdev->cpu_mask, new_cpu_mask); drbd_calc_cpu_mask(mdev); mdev->receiver.reset_cpu_mask = 1; mdev->asender.reset_cpu_mask = 1; -- cgit v1.2.3 From 17f2ded9030bde2d52e623496e86b403bf25aa90 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 29 Jul 2009 12:55:40 +0200 Subject: drbd: fixups for block api changes bio_barrier() and bio_sync() are going away. Signed-off-by: Stephen Rothwell Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_req.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 1e53d16c943c..38518691f356 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -117,7 +117,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, /* check for unsupported barrier op. * would rather check on EOPNOTSUPP, but that is not reliable. * don't try again for ANY return value != 0 */ - if (unlikely(bio_barrier(bio) && !ok)) { + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && !ok)) { /* Try again with no barrier */ dev_warn(DEV, "Barriers not supported on meta data device - disabling\n"); set_bit(MD_NO_BARRIER, &mdev->flags); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9c85a4b595f2..030f546967dc 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2370,9 +2370,9 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) /* NOTE: no need to check if barriers supported here as we would * not pass the test in make_request_common in that case */ - if (bio_barrier(req->master_bio)) + if (bio_rw_flagged(req->master_bio, BIO_RW_BARRIER)) dp_flags |= DP_HARDBARRIER; - if (bio_sync(req->master_bio)) + if (bio_rw_flagged(req->master_bio, BIO_RW_SYNCIO)) dp_flags |= DP_RW_SYNC; if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn <= C_PAUSED_SYNC_T) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d2b941cbc0a0..67609ec26a6d 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1020,7 +1020,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) * because of those XXX, this is not yet enabled, * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit. */ - if (unlikely(bio_barrier(bio) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) { + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) { /* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */ bio_endio(bio, -EOPNOTSUPP); return 0; -- cgit v1.2.3