summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2009-05-15 12:59:47 +0200
committerPhilipp Reisner <philipp.reisner@linbit.com>2009-07-29 10:32:31 +0200
commit27fabf42b3c043c104861650fe413cff8db8bd45 (patch)
tree0e488f2cb779d4bcfcdf48e12afa2b4f5e1247eb
parent38544ea16e86b0bac2a00e76de6cc15c826d7a10 (diff)
Mostry cleanups, triggered by reviews
Moved lru_cache.c to ~linux/lib lru_cache - documentation update, type cast removal Using kmalloc() first, trying vmalloc() as fallback for the page anchors of the bitmap Making everything kernel-doc compliant rename mdev->bc to mdev->ldev (to match the recent change to get_ldev/put_ldev) make drbd thread t_lock irqsave - lockdep complained, and lockdep is right (theoretically) Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/Kconfig1
-rw-r--r--drivers/block/drbd/Makefile2
-rw-r--r--drivers/block/drbd/drbd_actlog.c174
-rw-r--r--drivers/block/drbd/drbd_bitmap.c99
-rw-r--r--drivers/block/drbd/drbd_buildtag.c4
-rw-r--r--drivers/block/drbd/drbd_int.h96
-rw-r--r--drivers/block/drbd/drbd_main.c390
-rw-r--r--drivers/block/drbd/drbd_nl.c113
-rw-r--r--drivers/block/drbd/drbd_proc.c9
-rw-r--r--drivers/block/drbd/drbd_receiver.c93
-rw-r--r--drivers/block/drbd/drbd_req.c6
-rw-r--r--drivers/block/drbd/drbd_tracing.c40
-rw-r--r--drivers/block/drbd/drbd_worker.c52
-rw-r--r--drivers/block/drbd/lru_cache.h116
-rw-r--r--include/linux/lru_cache.h285
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Makefile2
-rw-r--r--lib/lru_cache.c (renamed from drivers/block/drbd/lru_cache.c)170
18 files changed, 1036 insertions, 619 deletions
diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig
index 7ad8c2a3c2f3..b3676771731d 100644
--- a/drivers/block/drbd/Kconfig
+++ b/drivers/block/drbd/Kconfig
@@ -8,6 +8,7 @@ comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected"
config BLK_DEV_DRBD
tristate "DRBD Distributed Replicated Block Device support"
depends on PROC_FS && INET && CONNECTOR
+ select LRU_CACHE
help
NOTE: In order to authenticate connections you have to select
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
index f0f805cd2051..9dd069b0ded0 100644
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile
@@ -1,6 +1,6 @@
drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o
drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
-drbd-y += lru_cache.o drbd_main.o drbd_strings.o drbd_nl.o
+drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
drbd_trace-y := drbd_tracing.o drbd_strings.o
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index f1318e57f375..6b096b1720ea 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -200,13 +200,13 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
{
struct lc_element *al_ext;
- struct bm_extent *bm_ext;
+ struct lc_element *tmp;
unsigned long al_flags = 0;
spin_lock_irq(&mdev->al_lock);
- bm_ext = (struct bm_extent *)
- lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
- if (unlikely(bm_ext != NULL)) {
+ tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
+ if (unlikely(tmp != NULL)) {
+ struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
spin_unlock_irq(&mdev->al_lock);
return NULL;
@@ -329,9 +329,9 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
mx = min_t(int, AL_EXTENTS_PT,
mdev->act_log->nr_elements - mdev->al_tr_cycle);
for (i = 0; i < mx; i++) {
- extent_nr = lc_entry(mdev->act_log,
- mdev->al_tr_cycle+i)->lc_number;
- buffer->updates[i+1].pos = cpu_to_be32(mdev->al_tr_cycle+i);
+ unsigned idx = mdev->al_tr_cycle + i;
+ extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
+ buffer->updates[i+1].pos = cpu_to_be32(idx);
buffer->updates[i+1].extent = cpu_to_be32(extent_nr);
xor_sum ^= extent_nr;
}
@@ -346,10 +346,10 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
buffer->xor_sum = cpu_to_be32(xor_sum);
- sector = mdev->bc->md.md_offset
- + mdev->bc->md.al_offset + mdev->al_tr_pos;
+ sector = mdev->ldev->md.md_offset
+ + mdev->ldev->md.al_offset + mdev->al_tr_pos;
- if (!drbd_md_sync_page_io(mdev, mdev->bc, sector, WRITE)) {
+ if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
drbd_chk_io_error(mdev, 1, TRUE);
drbd_io_error(mdev, TRUE);
}
@@ -370,10 +370,13 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
}
/**
- * drbd_al_read_tr: Reads a single transaction record from the
- * on disk activity log.
- * Returns -1 on IO error, 0 on checksum error and 1 if it is a valid
- * record.
+ * drbd_al_read_tr() - Read a single transaction from the on disk activity log
+ * @mdev: DRBD device.
+ * @bdev: Block device to read form.
+ * @b: pointer to an al_transaction.
+ * @index: On disk slot of the transaction to read.
+ *
+ * Returns -1 on IO error, 0 on checksum error and 1 upon success.
*/
STATIC int drbd_al_read_tr(struct drbd_conf *mdev,
struct drbd_backing_dev *bdev,
@@ -401,9 +404,11 @@ STATIC int drbd_al_read_tr(struct drbd_conf *mdev,
}
/**
- * drbd_al_read_log: Restores the activity log from its on disk
- * representation. Returns 1 on success, returns 0 when
- * reading the log failed due to IO errors.
+ * drbd_al_read_log() - Restores the activity log from its on disk representation.
+ * @mdev: DRBD device.
+ * @bdev: Block device to read form.
+ *
+ * Returns 1 on success, returns 0 when reading the log failed due to IO errors.
*/
int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
{
@@ -557,8 +562,8 @@ STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev,
{
struct bio *bio;
struct page *page;
- sector_t on_disk_sector = enr + mdev->bc->md.md_offset
- + mdev->bc->md.bm_offset;
+ sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
+ + mdev->ldev->md.bm_offset;
unsigned int page_offset = PAGE_SIZE;
int offset;
int i = 0;
@@ -604,7 +609,7 @@ STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev,
bio->bi_private = wc;
bio->bi_end_io = atodb_endio;
- bio->bi_bdev = mdev->bc->md_bdev;
+ bio->bi_bdev = mdev->ldev->md_bdev;
bio->bi_sector = on_disk_sector;
if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE)
@@ -631,11 +636,11 @@ out_bio_put:
}
/**
- * drbd_al_to_on_disk_bm:
- * Writes the areas of the bitmap which are covered by the
- * currently active extents of the activity log.
- * called when we detach (unconfigure) local storage,
- * or when we go from R_PRIMARY to R_SECONDARY state.
+ * drbd_al_to_on_disk_bm() - * Writes bitmap parts covered by active AL extents
+ * @mdev: DRBD device.
+ *
+ * Called when we detach (unconfigure) local storage,
+ * or when we go from R_PRIMARY to R_SECONDARY role.
*/
void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
{
@@ -661,7 +666,7 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
wc.error = 0;
for (i = 0; i < nr_elements; i++) {
- enr = lc_entry(mdev->act_log, i)->lc_number;
+ enr = lc_element_by_index(mdev->act_log, i)->lc_number;
if (enr == LC_FREE)
continue;
/* next statement also does atomic_inc wc.count and local_cnt */
@@ -687,7 +692,7 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
}
}
- drbd_blk_run_queue(bdev_get_queue(mdev->bc->md_bdev));
+ drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
/* always (try to) flush bitmap to stable storage */
drbd_md_flush(mdev);
@@ -719,7 +724,7 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n");
for (i = 0; i < mdev->act_log->nr_elements; i++) {
- enr = lc_entry(mdev->act_log, i)->lc_number;
+ enr = lc_element_by_index(mdev->act_log, i)->lc_number;
if (enr == LC_FREE)
continue;
/* Really slow: if we have al-extents 16..19 active,
@@ -733,8 +738,8 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
}
/**
- * drbd_al_apply_to_bm: Sets the bits in the in-memory bitmap
- * which are described by the active extents of the activity log.
+ * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents
+ * @mdev: DRBD device.
*/
void drbd_al_apply_to_bm(struct drbd_conf *mdev)
{
@@ -746,7 +751,7 @@ void drbd_al_apply_to_bm(struct drbd_conf *mdev)
wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
for (i = 0; i < mdev->act_log->nr_elements; i++) {
- enr = lc_entry(mdev->act_log, i)->lc_number;
+ enr = lc_element_by_index(mdev->act_log, i)->lc_number;
if (enr == LC_FREE)
continue;
add += drbd_bm_ALe_set_all(mdev, enr);
@@ -773,8 +778,12 @@ static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
}
/**
- * drbd_al_shrink: Removes all active extents form the activity log.
- * (but does not write any transactions)
+ * drbd_al_shrink() - Removes all active extents form the activity log
+ * @mdev: DRBD device.
+ *
+ * Removes all active extents form the activity log, waiting until
+ * the reference count of each etry dropped to 0 first, of course.
+ *
* You need to lock mdev->act_log with lc_try_lock() / lc_unlock()
*/
void drbd_al_shrink(struct drbd_conf *mdev)
@@ -785,7 +794,7 @@ void drbd_al_shrink(struct drbd_conf *mdev)
D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags));
for (i = 0; i < mdev->act_log->nr_elements; i++) {
- al_ext = lc_entry(mdev->act_log, i);
+ al_ext = lc_element_by_index(mdev->act_log, i);
if (al_ext->lc_number == LC_FREE)
continue;
wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext));
@@ -834,7 +843,7 @@ STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused
STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
int count, int success)
{
- struct bm_extent *ext;
+ struct lc_element *e;
struct update_odbm_work *udw;
unsigned int enr;
@@ -845,8 +854,9 @@ STATIC void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
* a 16 MB extent border. (Currently this is true...) */
enr = BM_SECT_TO_EXT(sector);
- ext = (struct bm_extent *) lc_get(mdev->resync, enr);
- if (ext) {
+ e = lc_get(mdev->resync, enr);
+ if (e) {
+ struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
if (ext->lce.lc_number == enr) {
if (success)
ext->rs_left -= count;
@@ -1006,7 +1016,7 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
unsigned long sbnr, ebnr, lbnr, flags;
sector_t esector, nr_sectors;
unsigned int enr, count;
- struct bm_extent *ext;
+ struct lc_element *e;
if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
dev_err(DEV, "sector: %llus, size: %d\n",
@@ -1042,9 +1052,9 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
count = drbd_bm_set_bits(mdev, sbnr, ebnr);
enr = BM_SECT_TO_EXT(sector);
- ext = (struct bm_extent *) lc_find(mdev->resync, enr);
- if (ext)
- ext->rs_left += count;
+ e = lc_find(mdev->resync, enr);
+ if (e)
+ lc_entry(e, struct bm_extent, lce)->rs_left += count;
spin_unlock_irqrestore(&mdev->al_lock, flags);
out:
@@ -1054,21 +1064,23 @@ out:
static
struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
{
- struct bm_extent *bm_ext;
+ struct lc_element *e;
+ struct bm_extent *bm_ext;
int wakeup = 0;
- unsigned long rs_flags;
+ unsigned long rs_flags;
spin_lock_irq(&mdev->al_lock);
if (mdev->resync_locked > mdev->resync->nr_elements/2) {
spin_unlock_irq(&mdev->al_lock);
return NULL;
}
- bm_ext = (struct bm_extent *) lc_get(mdev->resync, enr);
+ e = lc_get(mdev->resync, enr);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
if (bm_ext) {
if (bm_ext->lce.lc_number != enr) {
bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
bm_ext->rs_failed = 0;
- lc_changed(mdev->resync, (struct lc_element *)bm_ext);
+ lc_changed(mdev->resync, &bm_ext->lce);
wakeup = 1;
}
if (bm_ext->lce.refcnt == 1)
@@ -1116,14 +1128,11 @@ static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
}
/**
- * drbd_rs_begin_io: Gets an extent in the resync LRU cache and sets it
- * to BME_LOCKED.
+ * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED
+ * @mdev: DRBD device.
+ * @sector: The sector number.
*
- * @sector: The sector number
- *
- * sleeps on al_wait.
- * returns 1 if successful.
- * returns 0 if interrupted.
+ * This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted.
*/
int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
{
@@ -1164,19 +1173,19 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
}
/**
- * drbd_try_rs_begin_io: Gets an extent in the resync LRU cache, sets it
- * to BME_NO_WRITES, then tries to set it to BME_LOCKED.
- *
- * @sector: The sector number
+ * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep
+ * @mdev: DRBD device.
+ * @sector: The sector number.
*
- * does not sleep.
- * returns zero if we could set BME_LOCKED and can proceed,
- * -EAGAIN if we need to try again.
+ * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then
+ * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
+ * if there is still application IO going on in this area.
*/
int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
{
unsigned int enr = BM_SECT_TO_EXT(sector);
const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
+ struct lc_element *e;
struct bm_extent *bm_ext;
int i;
@@ -1203,8 +1212,8 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
"dropping %u, aparently got 'synced' by application io\n",
mdev->resync_wenr);
- bm_ext = (struct bm_extent *)
- lc_find(mdev->resync, mdev->resync_wenr);
+ e = lc_find(mdev->resync, mdev->resync_wenr);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
if (bm_ext) {
D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
@@ -1217,7 +1226,9 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
dev_alert(DEV, "LOGIC BUG\n");
}
}
- bm_ext = (struct bm_extent *)lc_try_get(mdev->resync, enr);
+ /* TRY. */
+ e = lc_try_get(mdev->resync, enr);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
if (bm_ext) {
if (test_bit(BME_LOCKED, &bm_ext->flags))
goto proceed;
@@ -1236,13 +1247,16 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
}
goto check_al;
} else {
+ /* do we rather want to try later? */
if (mdev->resync_locked > mdev->resync->nr_elements-3) {
trace_drbd_resync(mdev, TRACE_LVL_ALL,
"resync_locked = %u!\n", mdev->resync_locked);
goto try_again;
}
- bm_ext = (struct bm_extent *)lc_get(mdev->resync, enr);
+ /* Do or do not. There is no try. -- Joda */
+ e = lc_get(mdev->resync, enr);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
if (!bm_ext) {
const unsigned long rs_flags = mdev->resync->flags;
if (rs_flags & LC_STARVING)
@@ -1254,7 +1268,7 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
if (bm_ext->lce.lc_number != enr) {
bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
bm_ext->rs_failed = 0;
- lc_changed(mdev->resync, (struct lc_element *)bm_ext);
+ lc_changed(mdev->resync, &bm_ext->lce);
wake_up(&mdev->al_wait);
D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
}
@@ -1289,6 +1303,7 @@ try_again:
void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
{
unsigned int enr = BM_SECT_TO_EXT(sector);
+ struct lc_element *e;
struct bm_extent *bm_ext;
unsigned long flags;
@@ -1297,7 +1312,8 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
(long long)sector, enr);
spin_lock_irqsave(&mdev->al_lock, flags);
- bm_ext = (struct bm_extent *) lc_find(mdev->resync, enr);
+ e = lc_find(mdev->resync, enr);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
if (!bm_ext) {
spin_unlock_irqrestore(&mdev->al_lock, flags);
dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n");
@@ -1312,7 +1328,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
return;
}
- if (lc_put(mdev->resync, (struct lc_element *)bm_ext) == 0) {
+ if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
clear_bit(BME_LOCKED, &bm_ext->flags);
clear_bit(BME_NO_WRITES, &bm_ext->flags);
mdev->resync_locked--;
@@ -1323,8 +1339,8 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
}
/**
- * drbd_rs_cancel_all: Removes extents from the resync LRU. Even
- * if they are BME_LOCKED.
+ * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED)
+ * @mdev: DRBD device.
*/
void drbd_rs_cancel_all(struct drbd_conf *mdev)
{
@@ -1343,13 +1359,15 @@ void drbd_rs_cancel_all(struct drbd_conf *mdev)
}
/**
- * drbd_rs_del_all: Gracefully remove all extents from the resync LRU.
- * there may be still a reference hold by someone. In that case this function
- * returns -EAGAIN.
- * In case all elements got removed it returns zero.
+ * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU
+ * @mdev: DRBD device.
+ *
+ * Returns 0 upon success, -EAGAIN if at least one reference count was
+ * not zero.
*/
int drbd_rs_del_all(struct drbd_conf *mdev)
{
+ struct lc_element *e;
struct bm_extent *bm_ext;
int i;
@@ -1360,7 +1378,8 @@ int drbd_rs_del_all(struct drbd_conf *mdev)
if (get_ldev_if_state(mdev, D_FAILED)) {
/* ok, ->resync is there. */
for (i = 0; i < mdev->resync->nr_elements; i++) {
- bm_ext = (struct bm_extent *) lc_entry(mdev->resync, i);
+ e = lc_element_by_index(mdev->resync, i);
+ bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
if (bm_ext->lce.lc_number == LC_FREE)
continue;
if (bm_ext->lce.lc_number == mdev->resync_wenr) {
@@ -1392,10 +1411,11 @@ int drbd_rs_del_all(struct drbd_conf *mdev)
return 0;
}
-/* Record information on a failure to resync the specified blocks
- *
- * called on C_SYNC_TARGET when resync write fails or P_NEG_RS_DREPLY received
- *
+/**
+ * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
+ * @mdev: DRBD device.
+ * @sector: The sector number.
+ * @size: Size of failed IO operation, in byte.
*/
void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
{
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 213fa12da121..d9b59b0611b0 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -80,6 +80,7 @@ struct drbd_bitmap {
/* definition of bits in bm_flags */
#define BM_LOCKED 0
#define BM_MD_IO_ERROR 1
+#define BM_P_VMALLOCED 2
static int bm_is_locked(struct drbd_bitmap *b)
{
@@ -214,15 +215,23 @@ STATIC void bm_free_pages(struct page **pages, unsigned long number)
}
}
+STATIC void bm_vk_free(void *ptr, int v)
+{
+ if (v)
+ vfree(ptr);
+ else
+ kfree(ptr);
+}
+
/*
* "have" and "want" are NUMBER OF PAGES.
*/
-STATIC struct page **bm_realloc_pages(struct page **old_pages,
- unsigned long have,
- unsigned long want)
+STATIC struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
{
+ struct page **old_pages = b->bm_pages;
struct page **new_pages, *page;
- unsigned int i, bytes;
+ unsigned int i, bytes, vmalloced = 0;
+ unsigned long have = b->bm_number_of_pages;
BUG_ON(have == 0 && old_pages != NULL);
BUG_ON(have != 0 && old_pages == NULL);
@@ -230,27 +239,15 @@ STATIC struct page **bm_realloc_pages(struct page **old_pages,
if (have == want)
return old_pages;
- /* To use kmalloc here is ok, as long as we support 4TB at max...
- * otherwise this might become bigger than 128KB, which is
- * the maximum for kmalloc.
- *
- * no, it is not: on 64bit boxes, sizeof(void*) == 8,
- * 128MB bitmap @ 4K pages -> 256K of page pointers.
- * ==> use vmalloc for now again.
- * then again, we could do something like
- * if (nr_pages > watermark) vmalloc else kmalloc :*> ...
- * or do cascading page arrays:
- * one page for the page array of the page array,
- * those pages for the real bitmap pages.
- * there we could even add some optimization members,
- * so we won't need to kmap_atomic in bm_find_next_bit just to see
- * that the page has no bits set ...
- * or we can try a "huge" page ;-)
- */
+ /* Trying kmalloc first, falling back to vmalloc... */
bytes = sizeof(struct page *)*want;
- new_pages = vmalloc(bytes);
- if (!new_pages)
- return NULL;
+ new_pages = kmalloc(bytes, GFP_KERNEL);
+ if (!new_pages) {
+ new_pages = vmalloc(bytes);
+ if (!new_pages)
+ return NULL;
+ vmalloced = 1;
+ }
memset(new_pages, 0, bytes);
if (want >= have) {
@@ -260,7 +257,7 @@ STATIC struct page **bm_realloc_pages(struct page **old_pages,
page = alloc_page(GFP_HIGHUSER);
if (!page) {
bm_free_pages(new_pages + have, i - have);
- vfree(new_pages);
+ bm_vk_free(new_pages, vmalloced);
return NULL;
}
new_pages[i] = page;
@@ -273,6 +270,11 @@ STATIC struct page **bm_realloc_pages(struct page **old_pages,
*/
}
+ if (vmalloced)
+ set_bit(BM_P_VMALLOCED, &b->bm_flags);
+ else
+ clear_bit(BM_P_VMALLOCED, &b->bm_flags);
+
return new_pages;
}
@@ -308,7 +310,7 @@ void drbd_bm_cleanup(struct drbd_conf *mdev)
{
ERR_IF (!mdev->bitmap) return;
bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
- vfree(mdev->bitmap->bm_pages);
+ bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags));
kfree(mdev->bitmap);
mdev->bitmap = NULL;
}
@@ -462,6 +464,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
unsigned long want, have, onpages; /* number of pages */
struct page **npages, **opages = NULL;
int err = 0, growing;
+ int opages_vmalloced;
ERR_IF(!b) return -ENOMEM;
@@ -473,6 +476,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
if (capacity == b->bm_dev_capacity)
goto out;
+ opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags);
+
if (capacity == 0) {
spin_lock_irq(&b->bm_lock);
opages = b->bm_pages;
@@ -486,7 +491,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
b->bm_dev_capacity = 0;
spin_unlock_irq(&b->bm_lock);
bm_free_pages(opages, onpages);
- vfree(opages);
+ bm_vk_free(opages, opages_vmalloced);
goto out;
}
bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
@@ -499,7 +504,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
words = ALIGN(bits, 64) >> LN2_BPL;
if (get_ldev(mdev)) {
- D_ASSERT((u64)bits <= (((u64)mdev->bc->md.md_size_sect-MD_BM_OFFSET) << 12));
+ D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12));
put_ldev(mdev);
}
@@ -513,7 +518,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC))
npages = NULL;
else
- npages = bm_realloc_pages(b->bm_pages, have, want);
+ npages = bm_realloc_pages(b, want);
}
if (!npages) {
@@ -557,7 +562,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
spin_unlock_irq(&b->bm_lock);
if (opages != npages)
- vfree(opages);
+ bm_vk_free(opages, opages_vmalloced);
dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words);
out:
@@ -753,15 +758,15 @@ STATIC void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int
struct bio *bio = bio_alloc(GFP_KERNEL, 1);
unsigned int len;
sector_t on_disk_sector =
- mdev->bc->md.md_offset + mdev->bc->md.bm_offset;
+ mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
/* this might happen with very small
* flexible external meta data device */
len = min_t(unsigned int, PAGE_SIZE,
- (drbd_md_last_sector(mdev->bc) - on_disk_sector + 1)<<9);
+ (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
- bio->bi_bdev = mdev->bc->md_bdev;
+ bio->bi_bdev = mdev->ldev->md_bdev;
bio->bi_sector = on_disk_sector;
bio_add_page(bio, b->bm_pages[page_nr], len, 0);
bio->bi_private = b;
@@ -839,7 +844,7 @@ STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
for (i = 0; i < num_pages; i++)
bm_page_io_async(mdev, b, i, rw);
- drbd_blk_run_queue(bdev_get_queue(mdev->bc->md_bdev));
+ drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) {
@@ -870,9 +875,8 @@ STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
}
/**
- * drbd_bm_read: Read the whole bitmap from its on disk location.
- *
- * currently only called from "drbd_nl_disk_conf"
+ * drbd_bm_read() - Read the whole bitmap from its on disk location.
+ * @mdev: DRBD device.
*/
int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
{
@@ -880,9 +884,8 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
}
/**
- * drbd_bm_write: Write the whole bitmap to its on disk location.
- *
- * called at various occasions.
+ * drbd_bm_write() - Write the whole bitmap to its on disk location.
+ * @mdev: DRBD device.
*/
int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
{
@@ -890,16 +893,18 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
}
/**
- * drbd_bm_write_sect: Writes a 512 byte piece of the bitmap to its
- * on disk location. On disk bitmap is little endian.
- *
- * @enr: The _sector_ offset from the start of the bitmap.
+ * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap
+ * @mdev: DRBD device.
+ * @enr: Extent number in the resync lru (happens to be sector offset)
*
+ * The BM_EXT_SIZE is on purpose exactle the amount of the bitmap covered
+ * by a single sector write. Therefore enr == sector offset from the
+ * start of the bitmap.
*/
int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local)
{
- sector_t on_disk_sector = enr + mdev->bc->md.md_offset
- + mdev->bc->md.bm_offset;
+ sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
+ + mdev->ldev->md.bm_offset;
int bm_words, num_words, offset;
int err = 0;
@@ -911,7 +916,7 @@ int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(lo
memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE);
drbd_bm_get_lel(mdev, offset, num_words,
page_address(mdev->md_io_page));
- if (!drbd_md_sync_page_io(mdev, mdev->bc, on_disk_sector, WRITE)) {
+ if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) {
int i;
err = -EIO;
dev_err(DEV, "IO ERROR writing bitmap sector %lu "
diff --git a/drivers/block/drbd/drbd_buildtag.c b/drivers/block/drbd/drbd_buildtag.c
index 213234342e70..20fe72a104d3 100644
--- a/drivers/block/drbd/drbd_buildtag.c
+++ b/drivers/block/drbd/drbd_buildtag.c
@@ -2,6 +2,6 @@
#include <linux/drbd_config.h>
const char *drbd_buildtag(void)
{
- return "GIT-hash: c522e740ae3163f5a5ff83c0c58d9f2801299961 drbd/drbd_int.h"
- " build by phil@fat-tyre, 2009-05-05 17:15:39";
+ return "GIT-hash: b0abb3832a730d4fbd145013f6f51fc977bba3cc drbd/drbd_int.h"
+ " build by phil@fat-tyre, 2009-05-15 11:54:26";
}
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index dcc35bf67eea..83f9f33e65ea 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -40,7 +40,7 @@
#include <linux/blkdev.h>
#include <linux/genhd.h>
#include <net/tcp.h>
-#include "lru_cache.h"
+#include <linux/lru_cache.h>
#ifdef __CHECKER__
# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr")))
@@ -180,10 +180,7 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
extern struct drbd_conf **minor_table;
extern struct ratelimit_state drbd_ratelimit_state;
-/***
- * on the wire
- *********************************************************************/
-
+/* on the wire */
enum drbd_packets {
/* receiver (data socket) */
P_DATA = 0x00,
@@ -891,7 +888,7 @@ struct drbd_conf {
/* configured by drbdsetup */
struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */
struct syncer_conf sync_conf;
- struct drbd_backing_dev *bc __protected_by(local);
+ struct drbd_backing_dev *ldev __protected_by(local);
sector_t p_size; /* partner's disk size */
struct request_queue *rq_queue;
@@ -1148,7 +1145,7 @@ extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size)
extern int drbd_send_bitmap(struct drbd_conf *mdev);
extern int _drbd_send_bitmap(struct drbd_conf *mdev);
extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode);
-extern void drbd_free_bc(struct drbd_backing_dev *bc);
+extern void drbd_free_bc(struct drbd_backing_dev *ldev);
extern int drbd_io_error(struct drbd_conf *mdev, int forcedetach);
extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
@@ -1210,10 +1207,10 @@ extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf
/* resync bitmap */
/* 16MB sized 'bitmap extent' to track syncer usage */
struct bm_extent {
- struct lc_element lce;
int rs_left; /* number of bits set (out of sync) in this extent. */
int rs_failed; /* number of failed resync requests in this extent. */
unsigned long flags;
+ struct lc_element lce;
};
#define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */
@@ -1536,7 +1533,9 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
const struct drbd_epoch_entry* e);
-/** DRBD State macros:
+/**
+ * DOC: DRBD State macros
+ *
* These macros are used to express state changes in easily readable form.
*
* The NS macros expand to a mask and a value, that can be bit ored onto the
@@ -1613,6 +1612,16 @@ static inline int _drbd_set_state(struct drbd_conf *mdev,
return rv;
}
+/**
+ * drbd_request_state() - Reqest a state change
+ * @mdev: DRBD device.
+ * @mask: mask of state bits to change.
+ * @val: value of new state bits.
+ *
+ * This is the most graceful way of requesting a state change. It is verbose
+ * quite verbose in case the state change is not possible, and all those
+ * state changes are globally serialized.
+ */
static inline int drbd_request_state(struct drbd_conf *mdev,
union drbd_state mask,
union drbd_state val)
@@ -1620,13 +1629,9 @@ static inline int drbd_request_state(struct drbd_conf *mdev,
return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
}
-/**
- * drbd_chk_io_error: Handles the on_io_error setting, should be called from
- * all io completion handlers. See also drbd_io_error().
- */
static inline void __drbd_chk_io_error(struct drbd_conf *mdev, int forcedetach)
{
- switch (mdev->bc->dc.on_io_error) {
+ switch (mdev->ldev->dc.on_io_error) {
case EP_PASS_ON:
if (!forcedetach) {
if (printk_ratelimit())
@@ -1644,6 +1649,14 @@ static inline void __drbd_chk_io_error(struct drbd_conf *mdev, int forcedetach)
}
}
+/**
+ * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers
+ * @mdev: DRBD device.
+ * @error: Error code passed to the IO completion callback
+ * @forcedetach: Force detach. I.e. the error happened while accessing the meta data
+ *
+ * See also drbd_io_error().
+ */
static inline void drbd_chk_io_error(struct drbd_conf *mdev,
int error, int forcedetach)
{
@@ -1655,9 +1668,13 @@ static inline void drbd_chk_io_error(struct drbd_conf *mdev,
}
}
-/* Returns the first sector number of our meta data,
- * which, for internal meta data, happens to be the maximum capacity
- * we could agree upon with our peer
+
+/**
+ * drbd_md_first_sector() - Returns the first sector number of the meta data area
+ * @bdev: Meta data block device.
+ *
+ * BTW, for internal meta data, this happens to be the maximum capacity
+ * we could agree upon with our peer node.
*/
static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
{
@@ -1671,8 +1688,10 @@ static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
}
}
-/* returns the last sector number of our meta data,
- * to be able to catch out of band md access */
+/**
+ * drbd_md_last_sector() - Return the last sector number of the meta data area
+ * @bdev: Meta data block device.
+ */
static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
{
switch (bdev->dc.meta_dev_idx) {
@@ -1685,16 +1704,19 @@ static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
}
}
-/* Returns the number of 512 byte sectors of the device */
static inline sector_t drbd_get_capacity(struct block_device *bdev)
{
- /* return bdev ? get_capacity(bdev->bd_disk) : 0; */
- return bdev ? bdev->bd_inode->i_size >> 9 : 0;
+ return bdev ? get_capacity(bdev->bd_disk) : 0;
}
-/* returns the capacity we announce to out peer.
- * we clip ourselves at the various MAX_SECTORS, because if we don't,
- * current implementation will oops sooner or later */
+/**
+ * drbd_get_max_capacity() - Returns the capacity we announce to out peer
+ * @bdev: Meta data block device.
+ *
+ * returns the capacity we announce to out peer. we clip ourselves at the
+ * various MAX_SECTORS, because if we don't, current implementation will
+ * oops sooner or later
+ */
static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
{
sector_t s;
@@ -1721,7 +1743,11 @@ static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
return s;
}
-/* returns the sector number of our meta data 'super' block */
+/**
+ * drbd_md_ss__() - Return the sector number of our meta data super block
+ * @mdev: DRBD device.
+ * @bdev: Meta data block device.
+ */
static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
struct drbd_backing_dev *bdev)
{
@@ -1909,8 +1935,10 @@ static inline void put_net_conf(struct drbd_conf *mdev)
}
/**
- * get_net_conf: Returns TRUE when it is ok to access mdev->net_conf. You
- * should call put_net_conf() when finished looking at mdev->net_conf.
+ * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there
+ * @mdev: DRBD device.
+ *
+ * You have to call put_net_conf() when finished working with mdev->net_conf.
*/
static inline int get_net_conf(struct drbd_conf *mdev)
{
@@ -1924,11 +1952,13 @@ static inline int get_net_conf(struct drbd_conf *mdev)
}
/**
- * get_ldev: Returns TRUE when local IO is possible. If it returns
- * TRUE you should call put_ldev() after IO is completed.
+ * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev
+ * @M: DRBD device.
+ *
+ * You have to call put_ldev() when finished working with mdev->ldev.
*/
-#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS))
#define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT))
+#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS))
static inline void put_ldev(struct drbd_conf *mdev)
{
@@ -2197,7 +2227,7 @@ static inline void drbd_blk_run_queue(struct request_queue *q)
static inline void drbd_kick_lo(struct drbd_conf *mdev)
{
if (get_ldev(mdev)) {
- drbd_blk_run_queue(bdev_get_queue(mdev->bc->backing_bdev));
+ drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev));
put_ldev(mdev);
}
}
@@ -2209,7 +2239,7 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
if (test_bit(MD_NO_BARRIER, &mdev->flags))
return;
- r = blkdev_issue_flush(mdev->bc->md_bdev, NULL);
+ r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL);
if (r) {
set_bit(MD_NO_BARRIER, &mdev->flags);
dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 3c377d326570..ad296842b960 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -188,7 +188,16 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
#endif
-/************************* The transfer log start */
+/**
+ * DOC: The transfer log
+ *
+ * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
+ * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
+ * of the list. There is always at least one &struct drbd_tl_epoch object.
+ *
+ * Each &struct drbd_tl_epoch has a circular double linked list of requests
+ * attached.
+ */
STATIC int tl_init(struct drbd_conf *mdev)
{
struct drbd_tl_epoch *b;
@@ -227,7 +236,11 @@ STATIC void tl_cleanup(struct drbd_conf *mdev)
}
/**
- * _tl_add_barrier: Adds a barrier to the TL.
+ * _tl_add_barrier() - Adds a barrier to the transfer log
+ * @mdev: DRBD device.
+ * @new: Barrier to be added before the current head of the TL.
+ *
+ * The caller must hold the req_lock.
*/
void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
{
@@ -249,7 +262,16 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
}
}
-/* when we receive a barrier ack */
+/**
+ * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
+ * @mdev: DRBD device.
+ * @barrier_nr: Expected identifier of the DRBD write barrier packet.
+ * @set_size: Expected number of requests before that barrier.
+ *
+ * In case the passed barrier_nr or set_size does not match the oldest
+ * &struct drbd_tl_epoch objects this function will cause a termination
+ * of the connection.
+ */
void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
unsigned int set_size)
{
@@ -322,8 +344,14 @@ bail:
}
-/* called by drbd_disconnect (exiting receiver thread)
- * or from some after_state_ch */
+/**
+ * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
+ * @mdev: DRBD device.
+ *
+ * This is called after the connection to the peer was lost. The storage covered
+ * by the requests on the transfer gets marked as our of sync. Called from the
+ * receiver thread and the worker thread.
+ */
void tl_clear(struct drbd_conf *mdev)
{
struct drbd_tl_epoch *b, *tmp;
@@ -383,16 +411,14 @@ void tl_clear(struct drbd_conf *mdev)
}
/**
- * drbd_io_error: Handles the on_io_error setting, should be called in the
- * unlikely(!drbd_bio_uptodate(e->bio)) case from kernel thread context.
- * See also drbd_chk_io_error
+ * drbd_io_error() - Detach from the local disk of so configured with the on_io_error setting
+ * @mdev: DRBD device.
+ * @force_detach: Detach no matter how on_io_error is set (meta data IO error)
*
- * NOTE: we set ourselves FAILED here if on_io_error is EP_DETACH or Panic OR
- * if the forcedetach flag is set. This flag is set when failures
- * occur writing the meta data portion of the disk as they are
- * not recoverable.
+ * Should be called in the unlikely(!drbd_bio_uptodate(e->bio)) case from
+ * kernel thread context. See also drbd_chk_io_error().
*/
-int drbd_io_error(struct drbd_conf *mdev, int forcedetach)
+int drbd_io_error(struct drbd_conf *mdev, int force_detach)
{
enum drbd_io_error_p eh;
unsigned long flags;
@@ -401,11 +427,11 @@ int drbd_io_error(struct drbd_conf *mdev, int forcedetach)
eh = EP_PASS_ON;
if (get_ldev_if_state(mdev, D_FAILED)) {
- eh = mdev->bc->dc.on_io_error;
+ eh = mdev->ldev->dc.on_io_error;
put_ldev(mdev);
}
- if (!forcedetach && eh == EP_PASS_ON)
+ if (!force_detach && eh == EP_PASS_ON)
return 1;
spin_lock_irqsave(&mdev->req_lock, flags);
@@ -441,9 +467,10 @@ int drbd_io_error(struct drbd_conf *mdev, int forcedetach)
}
/**
- * cl_wide_st_chg:
- * Returns TRUE if this state change should be preformed as a cluster wide
- * transaction. Of course it returns 0 as soon as the connection is lost.
+ * cl_wide_st_chg() - TRUE if the state change is a cluster wide one
+ * @mdev: DRBD device.
+ * @os: old (current) state.
+ * @ns: new (wanted) state.
*/
STATIC int cl_wide_st_chg(struct drbd_conf *mdev,
union drbd_state os, union drbd_state ns)
@@ -474,6 +501,12 @@ int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
return rv;
}
+/**
+ * drbd_force_state() - Impose a change which happens outside our control on our state
+ * @mdev: DRBD device.
+ * @mask: mask of state bits to change.
+ * @val: value of new state bits.
+ */
void drbd_force_state(struct drbd_conf *mdev,
union drbd_state mask, union drbd_state val)
{
@@ -523,10 +556,14 @@ STATIC enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
}
/**
- * _drbd_request_state:
- * This function is the most gracefull way to change state. For some state
- * transition this function even does a cluster wide transaction.
- * It has a cousin named drbd_request_state(), which is always verbose.
+ * drbd_req_state() - Perform an eventually cluster wide state change
+ * @mdev: DRBD device.
+ * @mask: mask of state bits to change.
+ * @val: value of new state bits.
+ * @f: flags
+ *
+ * Should not be called directly, use drbd_request_state() or
+ * _drbd_request_state().
*/
STATIC int drbd_req_state(struct drbd_conf *mdev,
union drbd_state mask, union drbd_state val,
@@ -601,10 +638,14 @@ abort:
}
/**
- * _drbd_request_state:
- * This function is the most gracefull way to change state. For some state
- * transition this function even does a cluster wide transaction.
- * It has a cousin named drbd_request_state(), which is always verbose.
+ * _drbd_request_state() - Reqest a state change (with flags)
+ * @mdev: DRBD device.
+ * @mask: mask of state bits to change.
+ * @val: value of new state bits.
+ * @f: flags
+ *
+ * Cousin of drbd_request_state(), use full with the CS_WAIT_COMPLETE
+ * flag, or when logging of failed state change requests is not desired.
*/
int _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
union drbd_state val, enum chg_state_flags f)
@@ -659,6 +700,11 @@ void print_st_err(struct drbd_conf *mdev,
A##s_to_name(ns.A)); \
} })
+/**
+ * is_valid_state() - Returns an SS_ error code if ns is not valid
+ * @mdev: DRBD device.
+ * @ns: State to consider.
+ */
STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
{
/* See drbd_state_sw_errors in drbd_strings.c */
@@ -668,7 +714,7 @@ STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
fp = FP_DONT_CARE;
if (get_ldev(mdev)) {
- fp = mdev->bc->dc.fencing;
+ fp = mdev->ldev->dc.fencing;
put_ldev(mdev);
}
@@ -718,6 +764,12 @@ STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
return rv;
}
+/**
+ * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
+ * @mdev: DRBD device.
+ * @ns: new state.
+ * @os: old state.
+ */
STATIC int is_valid_state_transition(struct drbd_conf *mdev,
union drbd_state ns, union drbd_state os)
{
@@ -759,6 +811,16 @@ STATIC int is_valid_state_transition(struct drbd_conf *mdev,
return rv;
}
+/**
+ * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
+ * @mdev: DRBD device.
+ * @os: old state.
+ * @ns: new state.
+ * @warn_sync_abort:
+ *
+ * When we loose connection, we have to set the state of the peers disk (pdsk)
+ * to D_UNKNOWN. This rule and many more along those lines are in this function.
+ */
STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
union drbd_state ns, int *warn_sync_abort)
{
@@ -766,7 +828,7 @@ STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
fp = FP_DONT_CARE;
if (get_ldev(mdev)) {
- fp = mdev->bc->dc.fencing;
+ fp = mdev->ldev->dc.fencing;
put_ldev(mdev);
}
@@ -791,7 +853,7 @@ STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
ns.pdsk = D_UNKNOWN;
}
- /* Clear the aftr_isp when becomming Unconfigured */
+ /* Clear the aftr_isp when becoming unconfigured */
if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
ns.aftr_isp = 0;
@@ -854,7 +916,7 @@ STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
/* Connection breaks down before we finished "Negotiating" */
if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
get_ldev_if_state(mdev, D_NEGOTIATING)) {
- if (mdev->ed_uuid == mdev->bc->md.uuid[UI_CURRENT]) {
+ if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
ns.disk = mdev->new_state_tmp.disk;
ns.pdsk = mdev->new_state_tmp.pdsk;
} else {
@@ -886,6 +948,15 @@ STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
return ns;
}
+/**
+ * __drbd_set_state() - Set a new DRBD state
+ * @mdev: DRBD device.
+ * @ns: new state.
+ * @flags: Flags
+ * @done: Optional completion, that will get completed after the after_state_ch() finished
+ *
+ * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
+ */
int __drbd_set_state(struct drbd_conf *mdev,
union drbd_state ns, enum chg_state_flags flags,
struct completion *done)
@@ -966,7 +1037,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
wake_up(&mdev->misc_wait);
wake_up(&mdev->state_wait);
- /** post-state-change actions **/
+ /* post-state-change actions */
if (os.conn >= C_SYNC_SOURCE && ns.conn <= C_CONNECTED) {
set_bit(STOP_SYNC_TIMER, &mdev->flags);
mod_timer(&mdev->resync_timer, jiffies);
@@ -1010,7 +1081,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
}
if (get_ldev(mdev)) {
- u32 mdf = mdev->bc->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
+ u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
@@ -1027,12 +1098,12 @@ int __drbd_set_state(struct drbd_conf *mdev,
mdf |= MDF_WAS_UP_TO_DATE;
if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
mdf |= MDF_PEER_OUT_DATED;
- if (mdf != mdev->bc->md.flags) {
- mdev->bc->md.flags = mdf;
+ if (mdf != mdev->ldev->md.flags) {
+ mdev->ldev->md.flags = mdf;
drbd_md_mark_dirty(mdev);
}
if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
- drbd_set_ed_uuid(mdev, mdev->bc->md.uuid[UI_CURRENT]);
+ drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
put_ldev(mdev);
}
@@ -1102,6 +1173,13 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv)
}
}
+/**
+ * after_state_ch() - Perform after state change actions that may sleep
+ * @mdev: DRBD device.
+ * @os: old state.
+ * @ns: new state.
+ * @flags: Flags
+ */
STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
union drbd_state ns, enum chg_state_flags flags)
{
@@ -1115,7 +1193,7 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
fp = FP_DONT_CARE;
if (get_ldev(mdev)) {
- fp = mdev->bc->dc.fencing;
+ fp = mdev->ldev->dc.fencing;
put_ldev(mdev);
}
@@ -1159,7 +1237,7 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
mdev->p_uuid = NULL;
if (get_ldev(mdev)) {
if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
- mdev->bc->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
+ mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
drbd_uuid_new_current(mdev);
drbd_send_uuids(mdev);
}
@@ -1168,7 +1246,7 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
}
if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
- if (ns.peer == R_PRIMARY && mdev->bc->md.uuid[UI_BITMAP] == 0)
+ if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0)
drbd_uuid_new_current(mdev);
/* D_DISKLESS Peer becomes secondary */
@@ -1224,13 +1302,13 @@ STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
mdev->rs_failed = 0;
atomic_set(&mdev->rs_pending_cnt, 0);
- lc_free(mdev->resync);
+ lc_destroy(mdev->resync);
mdev->resync = NULL;
- lc_free(mdev->act_log);
+ lc_destroy(mdev->act_log);
mdev->act_log = NULL;
__no_warn(local,
- drbd_free_bc(mdev->bc);
- mdev->bc = NULL;);
+ drbd_free_bc(mdev->ldev);
+ mdev->ldev = NULL;);
if (mdev->md_io_tmpp)
__free_page(mdev->md_io_tmpp);
@@ -1273,12 +1351,13 @@ STATIC int drbd_thread_setup(void *arg)
{
struct drbd_thread *thi = (struct drbd_thread *) arg;
struct drbd_conf *mdev = thi->mdev;
+ unsigned long flags;
int retval;
restart:
retval = thi->function(thi);
- spin_lock(&thi->t_lock);
+ spin_lock_irqsave(&thi->t_lock, flags);
/* if the receiver has been "Exiting", the last thing it did
* was set the conn state to "StandAlone",
@@ -1293,7 +1372,7 @@ restart:
if (thi->t_state == Restarting) {
dev_info(DEV, "Restarting %s\n", current->comm);
thi->t_state = Running;
- spin_unlock(&thi->t_lock);
+ spin_unlock_irqrestore(&thi->t_lock, flags);
goto restart;
}
@@ -1301,7 +1380,7 @@ restart:
thi->t_state = None;
smp_mb();
complete(&thi->stop);
- spin_unlock(&thi->t_lock);
+ spin_unlock_irqrestore(&thi->t_lock, flags);
dev_info(DEV, "Terminating %s\n", current->comm);
@@ -1324,12 +1403,17 @@ int drbd_thread_start(struct drbd_thread *thi)
{
struct drbd_conf *mdev = thi->mdev;
struct task_struct *nt;
+ unsigned long flags;
+
const char *me =
thi == &mdev->receiver ? "receiver" :
thi == &mdev->asender ? "asender" :
thi == &mdev->worker ? "worker" : "NONSENSE";
- spin_lock(&thi->t_lock);
+ /* is used from state engine doing drbd_thread_stop_nowait,
+ * while holding the req lock irqsave */
+ spin_lock_irqsave(&thi->t_lock, flags);
+
switch (thi->t_state) {
case None:
dev_info(DEV, "Starting %s thread (from %s [%d])\n",
@@ -1338,14 +1422,14 @@ int drbd_thread_start(struct drbd_thread *thi)
/* Get ref on module for thread - this is released when thread exits */
if (!try_module_get(THIS_MODULE)) {
dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
- spin_unlock(&thi->t_lock);
+ spin_unlock_irqrestore(&thi->t_lock, flags);
return FALSE;
}
D_ASSERT(thi->task == NULL);
thi->reset_cpu_mask = 1;
thi->t_state = Running;
- spin_unlock(&thi->t_lock);
+ spin_unlock_irqrestore(&thi->t_lock, flags);
flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
nt = kthread_create(drbd_thread_setup, (void *) thi,
@@ -1371,7 +1455,7 @@ int drbd_thread_start(struct drbd_thread *thi)
case Running:
case Restarting:
default:
- spin_unlock(&thi->t_lock);
+ spin_unlock_irqrestore(&thi->t_lock, flags);
break;
}
@@ -1381,12 +1465,14 @@ int drbd_thread_start(struct drbd_thread *thi)
void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
{
+ unsigned long flags;
enum drbd_thread_state ns = restart ? Restarting : Exiting;
- spin_lock(&thi->t_lock);
+ /* may be called from state engine, holding the req lock irqsave */
+ spin_lock_irqsave(&thi->t_lock, flags);
if (thi->t_state == None) {
- spin_unlock(&thi->t_lock);
+ spin_unlock_irqrestore(&thi->t_lock, flags);
if (restart)
drbd_thread_start(thi);
return;
@@ -1394,7 +1480,7 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
if (thi->t_state != ns) {
if (thi->task == NULL) {
- spin_unlock(&thi->t_lock);
+ spin_unlock_irqrestore(&thi->t_lock, flags);
return;
}
@@ -1406,7 +1492,7 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
}
- spin_unlock(&thi->t_lock);
+ spin_unlock_irqrestore(&thi->t_lock, flags);
if (wait)
wait_for_completion(&thi->stop);
@@ -1414,7 +1500,9 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
#ifdef CONFIG_SMP
/**
- * drbd_calc_cpu_mask: Generates CPU masks, sprad over all CPUs.
+ * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
+ * @mdev: DRBD device.
+ *
* Forces all threads of a device onto the same CPU. This is benificial for
* DRBD's performance. May be overwritten by user's configuration.
*/
@@ -1439,9 +1527,12 @@ cpumask_t drbd_calc_cpu_mask(struct drbd_conf *mdev)
return (cpumask_t) CPU_MASK_ALL; /* Never reached. */
}
-/* modifies the cpu mask of the _current_ thread,
- * call in the "main loop" of _all_ threads.
- * no need for any mutex, current won't die prematurely.
+/**
+ * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
+ * @mdev: DRBD device.
+ *
+ * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
+ * prematurely.
*/
void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
{
@@ -1623,7 +1714,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
return 1;
for (i = UI_CURRENT; i < UI_SIZE; i++)
- p.uuid[i] = mdev->bc ? cpu_to_be64(mdev->bc->md.uuid[i]) : 0;
+ p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
mdev->comm_bm_set = drbd_bm_total_weight(mdev);
p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
@@ -1667,9 +1758,9 @@ int drbd_send_sizes(struct drbd_conf *mdev)
int ok;
if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
- D_ASSERT(mdev->bc->backing_bdev);
- d_size = drbd_get_max_capacity(mdev->bc);
- u_size = mdev->bc->dc.disk_size;
+ D_ASSERT(mdev->ldev->backing_bdev);
+ d_size = drbd_get_max_capacity(mdev->ldev);
+ u_size = mdev->ldev->dc.disk_size;
q_order_type = drbd_queue_order_type(mdev);
p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev));
put_ldev(mdev);
@@ -1691,11 +1782,8 @@ int drbd_send_sizes(struct drbd_conf *mdev)
}
/**
- * drbd_send_state:
- * Informs the peer about our state. Only call it when
- * mdev->state.conn >= C_CONNECTED (I.e. you may not call it while in
- * WFReportParams. Though there is one valid and necessary exception,
- * drbd_connect() calls drbd_send_state() while in it WFReportParams.
+ * drbd_send_state() - Sends the drbd state to the peer
+ * @mdev: DRBD device.
*/
int drbd_send_state(struct drbd_conf *mdev)
{
@@ -1906,7 +1994,7 @@ int _drbd_send_bitmap(struct drbd_conf *mdev)
}
if (get_ldev(mdev)) {
- if (drbd_md_test_flag(mdev->bc, MDF_FULL_SYNC)) {
+ if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
drbd_bm_set_all(mdev);
if (drbd_bm_write(mdev)) {
@@ -1962,9 +2050,12 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
}
/**
- * _drbd_send_ack:
- * This helper function expects the sector and block_id parameter already
- * in big endian!
+ * _drbd_send_ack() - Sends an ack packet
+ * @mdev: DRBD device.
+ * @cmd: Packet command code.
+ * @sector: sector, needs to be in big endian byte order
+ * @blksize: size in byte, needs to be in big endian byte order
+ * @block_id: Id, big endian byte order
*/
STATIC int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
u64 sector,
@@ -2003,6 +2094,12 @@ int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
}
+/**
+ * drbd_send_ack() - Sends an ack packet
+ * @mdev: DRBD device.
+ * @cmd: Packet command code.
+ * @e: Epoch entry.
+ */
int drbd_send_ack(struct drbd_conf *mdev,
enum drbd_packets cmd, struct drbd_epoch_entry *e)
{
@@ -2763,10 +2860,8 @@ static void drbd_delete_device(unsigned int minor)
mdev->ee_hash = NULL;
*/
- if (mdev->act_log)
- lc_free(mdev->act_log);
- if (mdev->resync)
- lc_free(mdev->resync);
+ lc_destroy(mdev->act_log);
+ lc_destroy(mdev->resync);
kfree(mdev->p_uuid);
/* mdev->p_uuid = NULL; */
@@ -2806,9 +2901,11 @@ STATIC void drbd_cleanup(void)
}
/**
- * drbd_congested: Returns 1<<BDI_async_congested and/or
- * 1<<BDI_sync_congested if we are congested. This interface is known
- * to be used by pdflush.
+ * drbd_congested() - Callback for pdflush
+ * @congested_data: User data
+ * @bdi_bits: Bits pdflush is currently interested in
+ *
+ * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
*/
static int drbd_congested(void *congested_data, int bdi_bits)
{
@@ -2825,7 +2922,7 @@ static int drbd_congested(void *congested_data, int bdi_bits)
}
if (get_ldev(mdev)) {
- q = bdev_get_queue(mdev->bc->backing_bdev);
+ q = bdev_get_queue(mdev->ldev->backing_bdev);
r = bdi_congested(&q->backing_dev_info, bdi_bits);
put_ldev(mdev);
if (r)
@@ -3029,18 +3126,18 @@ Enomem:
return err;
}
-void drbd_free_bc(struct drbd_backing_dev *bc)
+void drbd_free_bc(struct drbd_backing_dev *ldev)
{
- if (bc == NULL)
+ if (ldev == NULL)
return;
- bd_release(bc->backing_bdev);
- bd_release(bc->md_bdev);
+ bd_release(ldev->backing_bdev);
+ bd_release(ldev->md_bdev);
- fput(bc->lo_file);
- fput(bc->md_file);
+ fput(ldev->lo_file);
+ fput(ldev->md_file);
- kfree(bc);
+ kfree(ldev);
}
void drbd_free_sock(struct drbd_conf *mdev)
@@ -3072,11 +3169,10 @@ void drbd_free_resources(struct drbd_conf *mdev)
drbd_free_sock(mdev);
__no_warn(local,
- drbd_free_bc(mdev->bc);
- mdev->bc = NULL;);
+ drbd_free_bc(mdev->ldev);
+ mdev->ldev = NULL;);
}
-/*********************************/
/* meta data management */
struct meta_data_on_disk {
@@ -3097,8 +3193,8 @@ struct meta_data_on_disk {
} __packed;
/**
- * drbd_md_sync:
- * Writes the meta data super block if the MD_DIRTY flag bit is set.
+ * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
+ * @mdev: DRBD device.
*/
void drbd_md_sync(struct drbd_conf *mdev)
{
@@ -3115,7 +3211,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
if (!get_ldev_if_state(mdev, D_FAILED))
return;
- trace_drbd_md_io(mdev, WRITE, mdev->bc);
+ trace_drbd_md_io(mdev, WRITE, mdev->ldev);
mutex_lock(&mdev->md_io_mutex);
buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
@@ -3123,22 +3219,22 @@ void drbd_md_sync(struct drbd_conf *mdev)
buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
for (i = UI_CURRENT; i < UI_SIZE; i++)
- buffer->uuid[i] = cpu_to_be64(mdev->bc->md.uuid[i]);
- buffer->flags = cpu_to_be32(mdev->bc->md.flags);
+ buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
+ buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
- buffer->md_size_sect = cpu_to_be32(mdev->bc->md.md_size_sect);
- buffer->al_offset = cpu_to_be32(mdev->bc->md.al_offset);
+ buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect);
+ buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset);
buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
- buffer->device_uuid = cpu_to_be64(mdev->bc->md.device_uuid);
+ buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
- buffer->bm_offset = cpu_to_be32(mdev->bc->md.bm_offset);
+ buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
- D_ASSERT(drbd_md_ss__(mdev, mdev->bc) == mdev->bc->md.md_offset);
- sector = mdev->bc->md.md_offset;
+ D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
+ sector = mdev->ldev->md.md_offset;
- if (drbd_md_sync_page_io(mdev, mdev->bc, sector, WRITE)) {
+ if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
clear_bit(MD_DIRTY, &mdev->flags);
} else {
/* this was a try anyways ... */
@@ -3148,20 +3244,21 @@ void drbd_md_sync(struct drbd_conf *mdev)
drbd_io_error(mdev, TRUE);
}
- /* Update mdev->bc->md.la_size_sect,
+ /* Update mdev->ldev->md.la_size_sect,
* since we updated it on metadata. */
- mdev->bc->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
+ mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
mutex_unlock(&mdev->md_io_mutex);
put_ldev(mdev);
}
/**
- * drbd_md_read:
- * @bdev: describes the backing storage and the meta-data storage
- * Reads the meta data from bdev. Return 0 (NO_ERROR) on success, and an
- * enum drbd_ret_codes in case something goes wrong.
- * Currently only: ERR_IO_MD_DISK, MDInvalid.
+ * drbd_md_read() - Reads in the meta data super block
+ * @mdev: DRBD device.
+ * @bdev: Device from which the meta data should be read in.
+ *
+ * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case
+ * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
*/
int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
{
@@ -3233,7 +3330,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
}
/**
- * drbd_md_mark_dirty:
+ * drbd_md_mark_dirty() - Mark meta data super block as dirty
+ * @mdev: DRBD device.
+ *
* Call this function if you change enything that should be written to
* the meta-data super block. This function sets MD_DIRTY, and starts a
* timer that ensures that within five seconds you have to call drbd_md_sync().
@@ -3250,7 +3349,7 @@ STATIC void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
int i;
for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) {
- mdev->bc->md.uuid[i+1] = mdev->bc->md.uuid[i];
+ mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
trace_drbd_uuid(mdev, i+1);
}
@@ -3267,7 +3366,7 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
drbd_set_ed_uuid(mdev, val);
}
- mdev->bc->md.uuid[idx] = val;
+ mdev->ldev->md.uuid[idx] = val;
trace_drbd_uuid(mdev, idx);
drbd_md_mark_dirty(mdev);
}
@@ -3275,16 +3374,18 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
{
- if (mdev->bc->md.uuid[idx]) {
+ if (mdev->ldev->md.uuid[idx]) {
drbd_uuid_move_history(mdev);
- mdev->bc->md.uuid[UI_HISTORY_START] = mdev->bc->md.uuid[idx];
+ mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
trace_drbd_uuid(mdev, UI_HISTORY_START);
}
_drbd_uuid_set(mdev, idx, val);
}
/**
- * drbd_uuid_new_current:
+ * drbd_uuid_new_current() - Creates a new current UUID
+ * @mdev: DRBD device.
+ *
* Creates a new current UUID, and rotates the old current UUID into
* the bitmap slot. Causes an incremental resync upon next connect.
*/
@@ -3293,8 +3394,8 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
u64 val;
dev_info(DEV, "Creating new current UUID\n");
- D_ASSERT(mdev->bc->md.uuid[UI_BITMAP] == 0);
- mdev->bc->md.uuid[UI_BITMAP] = mdev->bc->md.uuid[UI_CURRENT];
+ D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
+ mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
trace_drbd_uuid(mdev, UI_BITMAP);
get_random_bytes(&val, sizeof(u64));
@@ -3303,21 +3404,21 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
{
- if (mdev->bc->md.uuid[UI_BITMAP] == 0 && val == 0)
+ if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
return;
if (val == 0) {
drbd_uuid_move_history(mdev);
- mdev->bc->md.uuid[UI_HISTORY_START] = mdev->bc->md.uuid[UI_BITMAP];
- mdev->bc->md.uuid[UI_BITMAP] = 0;
+ mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
+ mdev->ldev->md.uuid[UI_BITMAP] = 0;
trace_drbd_uuid(mdev, UI_HISTORY_START);
trace_drbd_uuid(mdev, UI_BITMAP);
} else {
- if (mdev->bc->md.uuid[UI_BITMAP])
+ if (mdev->ldev->md.uuid[UI_BITMAP])
dev_warn(DEV, "bm UUID already set");
- mdev->bc->md.uuid[UI_BITMAP] = val;
- mdev->bc->md.uuid[UI_BITMAP] &= ~((u64)1);
+ mdev->ldev->md.uuid[UI_BITMAP] = val;
+ mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
trace_drbd_uuid(mdev, UI_BITMAP);
}
@@ -3325,9 +3426,10 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
}
/**
- * drbd_bmio_set_n_write:
- * Is an io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() that sets
- * all bits in the bitmap and writes the whole bitmap to stable storage.
+ * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
+ * @mdev: DRBD device.
+ *
+ * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
*/
int drbd_bmio_set_n_write(struct drbd_conf *mdev)
{
@@ -3352,9 +3454,10 @@ int drbd_bmio_set_n_write(struct drbd_conf *mdev)
}
/**
- * drbd_bmio_clear_n_write:
- * Is an io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io() that clears
- * all bits in the bitmap and writes the whole bitmap to stable storage.
+ * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
+ * @mdev: DRBD device.
+ *
+ * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
*/
int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
{
@@ -3393,13 +3496,16 @@ STATIC int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
}
/**
- * drbd_queue_bitmap_io:
- * Queues an IO operation on the whole bitmap.
+ * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
+ * @mdev: DRBD device.
+ * @io_fn: IO callback to be called when bitmap IO is possible
+ * @done: callback to be called after the bitmap IO was performed
+ * @why: Descriptive text of the reason for doing the IO
+ *
* While IO on the bitmap happens we freeze appliation IO thus we ensure
- * that drbd_set_out_of_sync() can not be called.
- * This function MUST ONLY be called from worker context.
- * BAD API ALERT!
- * It MUST NOT be used while a previous such work is still pending!
+ * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
+ * called from worker context. It MUST NOT be used while a previous such
+ * work is still pending!
*/
void drbd_queue_bitmap_io(struct drbd_conf *mdev,
int (*io_fn)(struct drbd_conf *),
@@ -3430,9 +3536,13 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev,
}
/**
- * drbd_bitmap_io:
- * Does an IO operation on the bitmap, freezing application IO while that
- * IO operations runs. This functions MUST NOT be called from worker context.
+ * drbd_bitmap_io() - Does an IO operation on the whole bitmap
+ * @mdev: DRBD device.
+ * @io_fn: IO callback to be called when bitmap IO is possible
+ * @why: Descriptive text of the reason for doing the IO
+ *
+ * freezes application IO while that the actual IO operations runs. This
+ * functions MAY NOT be called from worker context.
*/
int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
{
@@ -3453,17 +3563,17 @@ int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), cha
void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
{
- if ((mdev->bc->md.flags & flag) != flag) {
+ if ((mdev->ldev->md.flags & flag) != flag) {
drbd_md_mark_dirty(mdev);
- mdev->bc->md.flags |= flag;
+ mdev->ldev->md.flags |= flag;
}
}
void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
{
- if ((mdev->bc->md.flags & flag) != 0) {
+ if ((mdev->ldev->md.flags & flag) != 0) {
drbd_md_mark_dirty(mdev);
- mdev->bc->md.flags &= ~flag;
+ mdev->ldev->md.flags &= ~flag;
}
}
int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 55dbf83d559f..c6217d6a2465 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -165,7 +165,7 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
if (get_ldev_if_state(mdev, D_CONSISTENT)) {
- fp = mdev->bc->dc.fencing;
+ fp = mdev->ldev->dc.fencing;
put_ldev(mdev);
} else {
dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
@@ -313,7 +313,7 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
if (new_role == R_SECONDARY) {
set_disk_ro(mdev->vdisk, TRUE);
if (get_ldev(mdev)) {
- mdev->bc->md.uuid[UI_CURRENT] &= ~(u64)1;
+ mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
put_ldev(mdev);
}
} else {
@@ -325,10 +325,10 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
if (get_ldev(mdev)) {
if (((mdev->state.conn < C_CONNECTED ||
mdev->state.pdsk <= D_FAILED)
- && mdev->bc->md.uuid[UI_BITMAP] == 0) || forced)
+ && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced)
drbd_uuid_new_current(mdev);
- mdev->bc->md.uuid[UI_CURRENT] |= (u64)1;
+ mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
put_ldev(mdev);
}
}
@@ -463,10 +463,10 @@ void drbd_resume_io(struct drbd_conf *mdev)
}
/**
- * drbd_determin_dev_size:
- * Evaluates all constraints and sets our correct device size.
- * Negative return values indicate errors. 0 and positive values
- * indicate success.
+ * drbd_determin_dev_size() - Sets the right device size obeying all constraints
+ * @mdev: DRBD device.
+ *
+ * Returns 0 on success, negative return values indicate errors.
* You should call drbd_md_sync() after calling this function.
*/
enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local)
@@ -493,14 +493,14 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_ho
/* no wait necessary anymore, actually we could assert that */
wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
- prev_first_sect = drbd_md_first_sector(mdev->bc);
- prev_size = mdev->bc->md.md_size_sect;
- la_size = mdev->bc->md.la_size_sect;
+ prev_first_sect = drbd_md_first_sector(mdev->ldev);
+ prev_size = mdev->ldev->md.md_size_sect;
+ la_size = mdev->ldev->md.la_size_sect;
/* TODO: should only be some assert here, not (re)init... */
- drbd_md_set_sector_offsets(mdev, mdev->bc);
+ drbd_md_set_sector_offsets(mdev, mdev->ldev);
- size = drbd_new_dev_size(mdev, mdev->bc);
+ size = drbd_new_dev_size(mdev, mdev->ldev);
if (drbd_get_capacity(mdev->this_bdev) != size ||
drbd_bm_capacity(mdev) != size) {
@@ -521,17 +521,17 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_ho
}
/* racy, see comments above. */
drbd_set_my_capacity(mdev, size);
- mdev->bc->md.la_size_sect = size;
+ mdev->ldev->md.la_size_sect = size;
dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
(unsigned long long)size>>1);
}
if (rv == dev_size_error)
goto out;
- la_size_changed = (la_size != mdev->bc->md.la_size_sect);
+ la_size_changed = (la_size != mdev->ldev->md.la_size_sect);
- md_moved = prev_first_sect != drbd_md_first_sector(mdev->bc)
- || prev_size != mdev->bc->md.md_size_sect;
+ md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
+ || prev_size != mdev->ldev->md.md_size_sect;
if (md_moved) {
dev_warn(DEV, "Moving meta-data.\n");
@@ -600,11 +600,12 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
}
/**
- * drbd_check_al_size:
- * checks that the al lru is of requested size, and if neccessary tries to
- * allocate a new one. returns -EBUSY if current al lru is still used,
- * -ENOMEM when allocation failed, and 0 on success. You should call
- * drbd_md_sync() after you called this function.
+ * drbd_check_al_size() - Ensures that the AL is of the right size
+ * @mdev: DRBD device.
+ *
+ * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
+ * failed, and 0 on success. You should call drbd_md_sync() after you called
+ * this function.
*/
STATIC int drbd_check_al_size(struct drbd_conf *mdev)
{
@@ -622,8 +623,8 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev)
in_use = 0;
t = mdev->act_log;
- n = lc_alloc("act_log", mdev->sync_conf.al_extents,
- sizeof(struct lc_element), mdev);
+ n = lc_create("act_log", mdev->sync_conf.al_extents,
+ sizeof(struct lc_element), 0);
if (n == NULL) {
dev_err(DEV, "Cannot allocate act_log lru!\n");
@@ -632,7 +633,7 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev)
spin_lock_irq(&mdev->al_lock);
if (t) {
for (i = 0; i < t->nr_elements; i++) {
- e = lc_entry(t, i);
+ e = lc_element_by_index(t, i);
if (e->refcnt)
dev_err(DEV, "refcnt(%d)==%d\n",
e->lc_number, e->refcnt);
@@ -644,11 +645,11 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev)
spin_unlock_irq(&mdev->al_lock);
if (in_use) {
dev_err(DEV, "Activity log still in use!\n");
- lc_free(n);
+ lc_destroy(n);
return -EBUSY;
} else {
if (t)
- lc_free(t);
+ lc_destroy(t);
}
drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */
return 0;
@@ -657,11 +658,11 @@ STATIC int drbd_check_al_size(struct drbd_conf *mdev)
void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local)
{
struct request_queue * const q = mdev->rq_queue;
- struct request_queue * const b = mdev->bc->backing_bdev->bd_disk->queue;
+ struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
/* unsigned int old_max_seg_s = q->max_segment_size; */
- int max_segments = mdev->bc->dc.max_bio_bvecs;
+ int max_segments = mdev->ldev->dc.max_bio_bvecs;
- if (b->merge_bvec_fn && !mdev->bc->dc.use_bmbv)
+ if (b->merge_bvec_fn && !mdev->ldev->dc.use_bmbv)
max_seg_s = PAGE_SIZE;
max_seg_s = min(b->max_sectors * b->hardsect_size, max_seg_s);
@@ -816,7 +817,8 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
goto fail;
}
- resync_lru = lc_alloc("resync", 61, sizeof(struct bm_extent), mdev);
+ resync_lru = lc_create("resync", 61, sizeof(struct bm_extent),
+ offsetof(struct bm_extent, lce));
if (!resync_lru) {
retcode = ERR_NOMEM;
goto release_bdev_fail;
@@ -964,8 +966,8 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
* Devices and memory are no longer released by error cleanup below.
* now mdev takes over responsibility, and the state engine should
* clean it up somewhere. */
- D_ASSERT(mdev->bc == NULL);
- mdev->bc = nbc;
+ D_ASSERT(mdev->ldev == NULL);
+ mdev->ldev = nbc;
mdev->resync = resync_lru;
nbc = NULL;
resync_lru = NULL;
@@ -973,12 +975,12 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
mdev->write_ordering = WO_bio_barrier;
drbd_bump_write_ordering(mdev, WO_bio_barrier);
- if (drbd_md_test_flag(mdev->bc, MDF_CRASHED_PRIMARY))
+ if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
set_bit(CRASHED_PRIMARY, &mdev->flags);
else
clear_bit(CRASHED_PRIMARY, &mdev->flags);
- if (drbd_md_test_flag(mdev->bc, MDF_PRIMARY_IND)) {
+ if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND)) {
set_bit(CRASHED_PRIMARY, &mdev->flags);
cp_discovered = 1;
}
@@ -1006,8 +1008,8 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
*/
clear_bit(USE_DEGR_WFC_T, &mdev->flags);
if (mdev->state.role != R_PRIMARY &&
- drbd_md_test_flag(mdev->bc, MDF_PRIMARY_IND) &&
- !drbd_md_test_flag(mdev->bc, MDF_CONNECTED_IND))
+ drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
+ !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
set_bit(USE_DEGR_WFC_T, &mdev->flags);
dd = drbd_determin_dev_size(mdev);
@@ -1017,7 +1019,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
} else if (dd == grew)
set_bit(RESYNC_AFTER_NEG, &mdev->flags);
- if (drbd_md_test_flag(mdev->bc, MDF_FULL_SYNC)) {
+ if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
dev_info(DEV, "Assuming that all blocks are out of sync "
"(aka FullSync)\n");
if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) {
@@ -1044,8 +1046,8 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
otherwise into D_CONSISTENT state.
*/
- if (drbd_md_test_flag(mdev->bc, MDF_CONSISTENT)) {
- if (drbd_md_test_flag(mdev->bc, MDF_WAS_UP_TO_DATE))
+ if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) {
+ if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE))
ns.disk = D_CONSISTENT;
else
ns.disk = D_OUTDATED;
@@ -1053,11 +1055,11 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
ns.disk = D_INCONSISTENT;
}
- if (drbd_md_test_flag(mdev->bc, MDF_PEER_OUT_DATED))
+ if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED))
ns.pdsk = D_OUTDATED;
if ( ns.disk == D_CONSISTENT &&
- (ns.pdsk == D_OUTDATED || mdev->bc->dc.fencing == FP_DONT_CARE))
+ (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE))
ns.disk = D_UP_TO_DATE;
/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
@@ -1081,9 +1083,9 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
goto force_diskless_dec;
if (mdev->state.role == R_PRIMARY)
- mdev->bc->md.uuid[UI_CURRENT] |= (u64)1;
+ mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
else
- mdev->bc->md.uuid[UI_CURRENT] &= ~(u64)1;
+ mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
drbd_md_mark_dirty(mdev);
drbd_md_sync(mdev);
@@ -1113,8 +1115,7 @@ STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
fput(nbc->md_file);
kfree(nbc);
}
- if (resync_lru)
- lc_free(resync_lru);
+ lc_destroy(resync_lru);
reply->ret_code = retcode;
drbd_reconfig_done(mdev);
@@ -1456,12 +1457,12 @@ STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
goto fail;
}
- if (mdev->bc->known_size != drbd_get_capacity(mdev->bc->backing_bdev)) {
- mdev->bc->known_size = drbd_get_capacity(mdev->bc->backing_bdev);
+ if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
+ mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
ldsc = 1;
}
- mdev->bc->dc.disk_size = (sector_t)rs.resize_size;
+ mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
dd = drbd_determin_dev_size(mdev);
drbd_md_sync(mdev);
put_ldev(mdev);
@@ -1736,7 +1737,7 @@ STATIC int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
tl = reply->tag_list;
if (get_ldev(mdev)) {
- tl = disk_conf_to_tags(mdev, &mdev->bc->dc, tl);
+ tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl);
put_ldev(mdev);
}
@@ -1788,11 +1789,11 @@ STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
/* This is a hand crafted add tag ;) */
*tl++ = T_uuids;
*tl++ = UI_SIZE*sizeof(u64);
- memcpy(tl, mdev->bc->md.uuid, UI_SIZE*sizeof(u64));
+ memcpy(tl, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64));
tl = (unsigned short *)((char *)tl + UI_SIZE*sizeof(u64));
*tl++ = T_uuids_flags;
*tl++ = sizeof(int);
- memcpy(tl, &mdev->bc->md.flags, sizeof(int));
+ memcpy(tl, &mdev->ldev->md.flags, sizeof(int));
tl = (unsigned short *)((char *)tl + sizeof(int));
put_ldev(mdev);
}
@@ -1802,8 +1803,10 @@ STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
}
/**
- * drbd_nl_get_timeout_flag:
- * Is used by drbdsetup to find out which timeout value to use.
+ * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use
+ * @mdev: DRBD device.
+ * @nlp: Netlink/connector packet from drbdsetup
+ * @reply: Reply packet for drbdsetup
*/
STATIC int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
struct drbd_nl_cfg_reply *reply)
@@ -1859,7 +1862,7 @@ STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
/* this is "skip initial sync", assume to be clean */
if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 &&
- mdev->bc->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
+ mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
dev_info(DEV, "Preparing to skip initial sync\n");
skip_initial_sync = 1;
} else if (mdev->state.conn >= C_CONNECTED) {
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 9f0a3c0e952c..b59b9d9f078c 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -35,7 +35,6 @@
#include <linux/drbd_config.h>
#include <linux/drbd.h>
#include "drbd_int.h"
-#include "lru_cache.h" /* for lc_sprintf_stats */
STATIC int drbd_proc_open(struct inode *inode, struct file *file);
@@ -137,7 +136,7 @@ STATIC void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
{
- struct bm_extent *bme = (struct bm_extent *)e;
+ struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
seq_printf(seq, "%5d %s %s\n", bme->rs_left,
bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------",
@@ -244,14 +243,14 @@ STATIC int drbd_seq_show(struct seq_file *seq, void *v)
mdev->rs_total);
if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) {
- lc_printf_stats(seq, mdev->resync);
- lc_printf_stats(seq, mdev->act_log);
+ lc_seq_printf_stats(seq, mdev->resync);
+ lc_seq_printf_stats(seq, mdev->act_log);
put_ldev(mdev);
}
if (proc_details >= 2) {
if (mdev->resync) {
- lc_dump(mdev->resync, seq, "rs_left",
+ lc_seq_dump_details(seq, mdev->resync, "rs_left",
resync_dump_detail);
}
}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 25da228de2fd..24dc84698de7 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -85,7 +85,13 @@ static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epo
#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
/**
- * drbd_bp_alloc: Returns a page. Fails only if a signal comes in.
+ * drbd_bp_alloc() - Returns a page, fails only if a signal comes in
+ * @mdev: DRBD device.
+ * @gfp_mask: Get free page allocation mask
+ *
+ * Allocates a page from the kernel or our own page pool. In case that
+ * allocation would go beyond the max_buffers setting, this function sleeps
+ * until DRBD frees a page somewhere else.
*/
STATIC struct page *drbd_pp_alloc(struct drbd_conf *mdev, gfp_t gfp_mask)
{
@@ -223,7 +229,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
goto fail1;
}
- bio->bi_bdev = mdev->bc->backing_bdev;
+ bio->bi_bdev = mdev->ldev->backing_bdev;
bio->bi_sector = sector;
ds = data_size;
@@ -703,9 +709,9 @@ STATIC enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *soc
}
/**
- * drbd_socket_okay:
- * Tests if the connection behind the socket still exists. If not it frees
- * the socket.
+ * drbd_socket_okay() - Free the socket if its connection is not okay
+ * @mdev: DRBD device.
+ * @sock: pointer to the pointer to the socket.
*/
static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock)
{
@@ -936,7 +942,7 @@ STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
int rv;
if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
- rv = blkdev_issue_flush(mdev->bc->backing_bdev, NULL);
+ rv = blkdev_issue_flush(mdev->ldev->backing_bdev, NULL);
if (rv) {
dev_err(DEV, "local disk flush failed with status %d\n", rv);
/* would rather check on EOPNOTSUPP, but that is not reliable.
@@ -950,10 +956,6 @@ STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
}
-/**
- * w_flush: Checks if an epoch can be closed and therefore might
- * close and/or free the epoch object.
- */
STATIC int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct flush_work *fw = (struct flush_work *)w;
@@ -971,8 +973,10 @@ STATIC int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
}
/**
- * drbd_may_finish_epoch: Checks if an epoch can be closed and therefore might
- * close and/or free the epoch object.
+ * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
+ * @mdev: DRBD device.
+ * @epoch: Epoch object.
+ * @ev: Epoch event.
*/
STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
struct drbd_epoch *epoch,
@@ -1088,8 +1092,9 @@ STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
}
/**
- * drbd_bump_write_ordering: It turned out that the current mdev->write_ordering
- * method does not work on the backing block device. Try the next allowed method.
+ * drbd_bump_write_ordering() - Fall back to an other write ordering method
+ * @mdev: DRBD device.
+ * @wo: Write ordering method to try.
*/
void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
{
@@ -1103,11 +1108,11 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
pwo = mdev->write_ordering;
wo = min(pwo, wo);
- if (wo == WO_bio_barrier && mdev->bc->dc.no_disk_barrier)
+ if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier)
wo = WO_bdev_flush;
- if (wo == WO_bdev_flush && mdev->bc->dc.no_disk_flush)
+ if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
wo = WO_drain_io;
- if (wo == WO_drain_io && mdev->bc->dc.no_disk_drain)
+ if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
wo = WO_none;
mdev->write_ordering = wo;
if (pwo != mdev->write_ordering || wo == WO_bio_barrier)
@@ -1115,8 +1120,10 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
}
/**
- * w_e_reissue: In case the IO subsystem delivered an error for an BIO with the
- * BIO_RW_BARRIER flag set, retry that bio without the barrier flag set.
+ * w_e_reissue() - Worker callback; Resubmit a bio, without BIO_RW_BARRIER set
+ * @mdev: DRBD device.
+ * @w: work object.
+ * @cancel: The connection will be closed anyways (unused in this callback)
*/
int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local)
{
@@ -1140,7 +1147,7 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea
* re-init volatile members */
/* we still have a local reference,
* get_ldev was done in receive_Data. */
- bio->bi_bdev = mdev->bc->backing_bdev;
+ bio->bi_bdev = mdev->ldev->backing_bdev;
bio->bi_sector = e->sector;
bio->bi_size = e->size;
bio->bi_idx = 0;
@@ -2066,7 +2073,7 @@ STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
int self, peer, rv = -100;
unsigned long ch_self, ch_peer;
- self = mdev->bc->md.uuid[UI_BITMAP] & 1;
+ self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
peer = mdev->p_uuid[UI_BITMAP] & 1;
ch_peer = mdev->p_uuid[UI_SIZE];
@@ -2137,7 +2144,7 @@ STATIC int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
{
int self, peer, hg, rv = -100;
- self = mdev->bc->md.uuid[UI_BITMAP] & 1;
+ self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
peer = mdev->p_uuid[UI_BITMAP] & 1;
switch (mdev->net_conf->after_sb_1p) {
@@ -2183,7 +2190,7 @@ STATIC int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
{
int self, peer, hg, rv = -100;
- self = mdev->bc->md.uuid[UI_BITMAP] & 1;
+ self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
peer = mdev->p_uuid[UI_BITMAP] & 1;
switch (mdev->net_conf->after_sb_2p) {
@@ -2250,7 +2257,7 @@ STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
u64 self, peer;
int i, j;
- self = mdev->bc->md.uuid[UI_CURRENT] & ~((u64)1);
+ self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
*rule_nr = 1;
@@ -2299,20 +2306,20 @@ STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
}
*rule_nr = 7;
- self = mdev->bc->md.uuid[UI_BITMAP] & ~((u64)1);
+ self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
if (self == peer)
return 1;
*rule_nr = 8;
for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
- self = mdev->bc->md.uuid[i] & ~((u64)1);
+ self = mdev->ldev->md.uuid[i] & ~((u64)1);
if (self == peer)
return 2;
}
*rule_nr = 9;
- self = mdev->bc->md.uuid[UI_BITMAP] & ~((u64)1);
+ self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
if (self == peer && self != ((u64)0))
return 100;
@@ -2347,7 +2354,7 @@ STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
hg = drbd_uuid_compare(mdev, &rule_nr);
dev_info(DEV, "drbd_sync_handshake:\n");
- drbd_uuid_dump(mdev, "self", mdev->bc->md.uuid,
+ drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
@@ -2740,33 +2747,33 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
if (get_ldev(mdev)) {
warn_if_differ_considerably(mdev, "lower level device sizes",
- p_size, drbd_get_max_capacity(mdev->bc));
+ p_size, drbd_get_max_capacity(mdev->ldev));
warn_if_differ_considerably(mdev, "user requested size",
- p_usize, mdev->bc->dc.disk_size);
+ p_usize, mdev->ldev->dc.disk_size);
/* if this is the first connect, or an otherwise expected
* param exchange, choose the minimum */
if (mdev->state.conn == C_WF_REPORT_PARAMS)
- p_usize = min_not_zero((sector_t)mdev->bc->dc.disk_size,
+ p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
p_usize);
- my_usize = mdev->bc->dc.disk_size;
+ my_usize = mdev->ldev->dc.disk_size;
- if (mdev->bc->dc.disk_size != p_usize) {
- mdev->bc->dc.disk_size = p_usize;
+ if (mdev->ldev->dc.disk_size != p_usize) {
+ mdev->ldev->dc.disk_size = p_usize;
dev_info(DEV, "Peer sets u_size to %lu sectors\n",
- (unsigned long)mdev->bc->dc.disk_size);
+ (unsigned long)mdev->ldev->dc.disk_size);
}
/* Never shrink a device with usable data during connect.
But allow online shrinking if we are connected. */
- if (drbd_new_dev_size(mdev, mdev->bc) <
+ if (drbd_new_dev_size(mdev, mdev->ldev) <
drbd_get_capacity(mdev->this_bdev) &&
mdev->state.disk >= D_OUTDATED &&
mdev->state.conn < C_CONNECTED) {
dev_err(DEV, "The peer's disk size is too small!\n");
drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
- mdev->bc->dc.disk_size = my_usize;
+ mdev->ldev->dc.disk_size = my_usize;
put_ldev(mdev);
return FALSE;
}
@@ -2802,8 +2809,8 @@ STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
}
if (get_ldev(mdev)) {
- if (mdev->bc->known_size != drbd_get_capacity(mdev->bc->backing_bdev)) {
- mdev->bc->known_size = drbd_get_capacity(mdev->bc->backing_bdev);
+ if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
+ mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
ldsc = 1;
}
@@ -2866,7 +2873,7 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct p_header *h)
int skip_initial_sync =
mdev->state.conn == C_CONNECTED &&
mdev->agreed_pro_version >= 90 &&
- mdev->bc->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
+ mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
(p_uuid[UI_FLAGS] & 8);
if (skip_initial_sync) {
dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
@@ -2893,8 +2900,8 @@ STATIC int receive_uuids(struct drbd_conf *mdev, struct p_header *h)
}
/**
- * convert_state:
- * Switches the view of the state.
+ * convert_state() - Converts the peer's view of the cluster state to our point of view
+ * @ps: The state as seen by the peer.
*/
STATIC union drbd_state convert_state(union drbd_state ps)
{
@@ -3519,7 +3526,7 @@ STATIC void drbd_disconnect(struct drbd_conf *mdev)
fp = FP_DONT_CARE;
if (get_ldev(mdev)) {
- fp = mdev->bc->dc.fencing;
+ fp = mdev->ldev->dc.fencing;
put_ldev(mdev);
}
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 2e70345a06d4..5c4039ad052e 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -925,7 +925,7 @@ allocate_barrier:
kfree(b); /* if someone else has beaten us to it... */
if (local) {
- req->private_bio->bi_bdev = mdev->bc->backing_bdev;
+ req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
trace_drbd_bio(mdev, "Pri", req->private_bio, 0, NULL);
@@ -1119,8 +1119,8 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
limit = bvec->bv_len;
} else if (limit && get_ldev(mdev)) {
struct request_queue * const b =
- mdev->bc->backing_bdev->bd_disk->queue;
- if (b->merge_bvec_fn && mdev->bc->dc.use_bmbv) {
+ mdev->ldev->backing_bdev->bd_disk->queue;
+ if (b->merge_bvec_fn && mdev->ldev->dc.use_bmbv) {
backing_limit = b->merge_bvec_fn(b, bvm, bvec);
limit = min(limit, backing_limit);
}
diff --git a/drivers/block/drbd/drbd_tracing.c b/drivers/block/drbd/drbd_tracing.c
index ab5aba9c4972..b467e92dda76 100644
--- a/drivers/block/drbd/drbd_tracing.c
+++ b/drivers/block/drbd/drbd_tracing.c
@@ -124,7 +124,7 @@ static void probe_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
dev_info(DEV, " uuid[%s] now %016llX\n",
uuid_str[index],
- (unsigned long long)mdev->bc->md.uuid[index]);
+ (unsigned long long)mdev->ldev->md.uuid[index]);
}
static void probe_drbd_md_io(struct drbd_conf *mdev, int rw,
@@ -223,30 +223,20 @@ static void probe_drbd_actlog(struct drbd_conf *mdev, sector_t sector, char* msg
(int)BM_SECT_TO_EXT(sector));
}
-/*
- *
- * drbd_print_buffer
- *
- * This routine dumps binary data to the debugging output. Can be
- * called at interrupt level.
- *
- * Arguments:
- *
- * prefix - String is output at the beginning of each line output
- * flags - Control operation of the routine. Currently defined
- * Flags are:
- * DBGPRINT_BUFFADDR; if set, each line starts with the
- * virtual address of the line being outupt. If clear,
- * each line starts with the offset from the beginning
- * of the buffer.
- * size - Indicates the size of each entry in the buffer. Supported
- * values are sizeof(char), sizeof(short) and sizeof(int)
- * buffer - Start address of buffer
- * buffer_va - Virtual address of start of buffer (normally the same
- * as Buffer, but having it separate allows it to hold
- * file address for example)
- * length - length of buffer
- *
+/**
+ * drbd_print_buffer() - Hexdump arbitraty binary data into a buffer
+ * @prefix: String is output at the beginning of each line output.
+ * @flags: Currently only defined flag: DBGPRINT_BUFFADDR; if set, each
+ * line starts with the virtual address of the line being
+ * outupt. If clear, each line starts with the offset from the
+ * beginning of the buffer.
+ * @size: Indicates the size of each entry in the buffer. Supported
+ * values are sizeof(char), sizeof(short) and sizeof(int)
+ * @buffer: Start address of buffer
+ * @buffer_va: Virtual address of start of buffer (normally the same
+ * as Buffer, but having it separate allows it to hold
+ * file address for example)
+ * @length: length of buffer
*/
static void drbd_print_buffer(const char *prefix, unsigned int flags, int size,
const void *buffer, const void *buffer_va,
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index dd984502d62e..96065835fb69 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -250,8 +250,8 @@ int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
struct drbd_request *req = (struct drbd_request *)w;
int ok;
- /* NOTE: mdev->bc can be NULL by the time we get here! */
- /* D_ASSERT(mdev->bc->dc.on_io_error != EP_PASS_ON); */
+ /* NOTE: mdev->ldev can be NULL by the time we get here! */
+ /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */
/* the only way this callback is scheduled is from _req_may_be_done,
* when it is done and had a local write error, see comments there */
@@ -740,7 +740,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
int i;
for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
_drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
- drbd_uuid_set(mdev, UI_BITMAP, mdev->bc->md.uuid[UI_CURRENT]);
+ drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
_drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
} else {
dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
@@ -754,7 +754,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
* know of the peer. */
int i;
for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
- mdev->p_uuid[i] = mdev->bc->md.uuid[i];
+ mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
}
}
@@ -781,7 +781,10 @@ out:
}
/**
- * w_e_end_data_req: Send the answer (P_DATA_REPLY) in response to a DataRequest.
+ * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
+ * @mdev: DRBD device.
+ * @w: work object.
+ * @cancel: The connection will be closed anyways
*/
int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
@@ -823,7 +826,10 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
}
/**
- * w_e_end_rsdata_req: Send the answer (P_RS_DATA_REPLY) to a RSDataRequest.
+ * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
+ * @mdev: DRBD device.
+ * @w: work object.
+ * @cancel: The connection will be closed anyways
*/
int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
@@ -1100,7 +1106,10 @@ int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
}
/**
- * w_send_dblock: Send a mirrored write request.
+ * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
+ * @mdev: DRBD device.
+ * @w: work object.
+ * @cancel: The connection will be closed anyways
*/
int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
@@ -1119,7 +1128,10 @@ int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
}
/**
- * w_send_read_req: Send a read requests.
+ * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
+ * @mdev: DRBD device.
+ * @w: work object.
+ * @cancel: The connection will be closed anyways
*/
int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
@@ -1163,9 +1175,9 @@ STATIC int _drbd_may_sync_now(struct drbd_conf *mdev)
}
/**
- * _drbd_pause_after:
- * Finds all devices that may not resync now, and causes them to
- * pause their resynchronisation.
+ * _drbd_pause_after() - Pause resync on all devices that may not resync now
+ * @mdev: DRBD device.
+ *
* Called from process context only (admin command and after_state_ch).
*/
STATIC int _drbd_pause_after(struct drbd_conf *mdev)
@@ -1188,9 +1200,9 @@ STATIC int _drbd_pause_after(struct drbd_conf *mdev)
}
/**
- * _drbd_resume_next:
- * Finds all devices that can resume resynchronisation
- * process, and causes them to resume.
+ * _drbd_resume_next() - Resume resync on all devices that may resync now
+ * @mdev: DRBD device.
+ *
* Called from process context only (admin command and worker).
*/
STATIC int _drbd_resume_next(struct drbd_conf *mdev)
@@ -1244,12 +1256,12 @@ void drbd_alter_sa(struct drbd_conf *mdev, int na)
}
/**
- * drbd_start_resync:
- * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
- * Start the resync process. Called from process context only,
- * either admin command or drbd_receiver.
- * Note, this function might bring you directly into one of the
- * PausedSync* states.
+ * drbd_start_resync() - Start the resync process
+ * @mdev: DRBD device.
+ * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
+ *
+ * This function might bring you directly into one of the
+ * C_PAUSED_SYNC_* states.
*/
void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
{
diff --git a/drivers/block/drbd/lru_cache.h b/drivers/block/drbd/lru_cache.h
deleted file mode 100644
index eabf897948d0..000000000000
--- a/drivers/block/drbd/lru_cache.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- lru_cache.h
-
- This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
-
- Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
- Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
- Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
-
- drbd is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- drbd is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with drbd; see the file COPYING. If not, write to
- the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-
- */
-
-#ifndef LRU_CACHE_H
-#define LRU_CACHE_H
-
-#include <linux/list.h>
-
-struct lc_element {
- struct hlist_node colision;
- struct list_head list; /* LRU list or free list */
- unsigned int refcnt;
- unsigned int lc_number;
-};
-
-struct lru_cache {
- struct list_head lru;
- struct list_head free;
- struct list_head in_use;
- size_t element_size;
- unsigned int nr_elements;
- unsigned int new_number;
-
- unsigned int used;
- unsigned long flags;
- unsigned long hits, misses, starving, dirty, changed;
- struct lc_element *changing_element; /* just for paranoia */
-
- void *lc_private;
- const char *name;
-
- struct hlist_head slot[0];
- /* hash colision chains here, then element storage. */
-};
-
-
-/* flag-bits for lru_cache */
-enum {
- __LC_PARANOIA,
- __LC_DIRTY,
- __LC_STARVING,
-};
-#define LC_PARANOIA (1<<__LC_PARANOIA)
-#define LC_DIRTY (1<<__LC_DIRTY)
-#define LC_STARVING (1<<__LC_STARVING)
-
-extern struct lru_cache *lc_alloc(const char *name, unsigned int e_count,
- size_t e_size, void *private_p);
-extern void lc_reset(struct lru_cache *lc);
-extern void lc_free(struct lru_cache *lc);
-extern void lc_set(struct lru_cache *lc, unsigned int enr, int index);
-extern void lc_del(struct lru_cache *lc, struct lc_element *element);
-
-extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
-extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
-extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
-extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
-extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
-
-struct seq_file;
-extern size_t lc_printf_stats(struct seq_file *seq, struct lru_cache *lc);
-
-void lc_dump(struct lru_cache *lc, struct seq_file *seq, char *utext,
- void (*detail) (struct seq_file *, struct lc_element *));
-
-/* This can be used to stop lc_get from changing the set of active elements.
- * Note that the reference counts and order on the lru list may still change.
- * returns true if we aquired the lock.
- */
-static inline int lc_try_lock(struct lru_cache *lc)
-{
- return !test_and_set_bit(__LC_DIRTY, &lc->flags);
-}
-
-static inline void lc_unlock(struct lru_cache *lc)
-{
- clear_bit(__LC_DIRTY, &lc->flags);
- smp_mb__after_clear_bit();
-}
-
-static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
-{
- struct lc_element *e = lc_find(lc, enr);
- return e && e->refcnt;
-}
-
-#define LC_FREE (-1U)
-
-#define lc_e_base(lc) ((char *)((lc)->slot + (lc)->nr_elements))
-#define lc_entry(lc, i) ((struct lc_element *) \
- (lc_e_base(lc) + (i)*(lc)->element_size))
-#define lc_index_of(lc, e) (((char *)(e) - lc_e_base(lc))/(lc)->element_size)
-
-#endif
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
new file mode 100644
index 000000000000..69e2455b00be
--- /dev/null
+++ b/include/linux/lru_cache.h
@@ -0,0 +1,285 @@
+/*
+ lru_cache.c
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#ifndef LRU_CACHE_H
+#define LRU_CACHE_H
+
+#include <linux/list.h>
+
+/*
+This header file (and its .c file; kernel-doc of functions see there)
+ define a helper framework to easily keep track of index:label associations,
+ and changes to an "active set" of objects, as well as pending transactions,
+ to persistently record those changes.
+
+ We use an LRU policy if it is necessary to "cool down" a region currently in
+ the active set before we can "heat" a previously unused region.
+
+ Because of this later property, it is called "lru_cache".
+ As it actually Tracks Objects in an Active SeT, we could also call it
+ toast (incidentally that is what may happen to the data on the
+ backend storage uppon next resync, if we don't get it right).
+
+What for?
+
+We replicate IO (more or less synchronously) to local and remote disk.
+
+For crash recovery after replication node failure,
+ we need to resync all regions that have been target of in-flight WRITE IO
+ (in use, or "hot", regions), as we don't know wether or not those WRITEs have
+ made it to stable storage.
+
+ To avoid a "full resync", we need to persistently track these regions.
+
+ This is known as "write intent log", and can be implemented as on-disk
+ (coarse or fine grained) bitmap, or other meta data.
+
+ To avoid the overhead of frequent extra writes to this meta data area,
+ usually the condition is softened to regions that _may_ have been target of
+ in-flight WRITE IO, e.g. by only lazily clearing the on-disk write-intent
+ bitmap, trading frequency of meta data transactions against amount of
+ (possibly unneccessary) resync traffic.
+
+ If we set a hard limit on the area that may be "hot" at any given time, we
+ limit the amount of resync traffic needed for crash recovery.
+
+For recovery after replication link failure,
+ we need to resync all blocks that have been changed on the other replica
+ in the mean time, or, if both replica have been changed independently [*],
+ all blocks that have been changed on either replica in the mean time.
+ [*] usually as a result of a cluster split-brain and insufficient protection.
+ but there are valid use cases to do this on purpose.
+
+ Tracking those blocks can be implemented as "dirty bitmap".
+ Having it fine-grained reduces the amount of resync traffic.
+ It should also be persistent, to allow for reboots (or crashes)
+ while the replication link is down.
+
+There are various possible implementations for persistently storing
+write intent log information, three of which are mentioned here.
+
+"Chunk dirtying"
+ The on-disk "dirty bitmap" may be re-used as "write-intent" bitmap as well.
+ To reduce the frequency of bitmap updates for write-intent log purposes,
+ one could dirty "chunks" (of some size) at a time of the (fine grained)
+ on-disk bitmap, while keeping the in-memory "dirty" bitmap as clean as
+ possible, flushing it to disk again when a previously "hot" (and on-disk
+ dirtied as full chunk) area "cools down" again (no IO in flight anymore,
+ and none expected in the near future either).
+
+"Explicit (coarse) write intent bitmap"
+ An other implementation could chose a (probably coarse) explicit bitmap,
+ for write-intent log purposes, additionally to the fine grained dirty bitmap.
+
+"Activity log"
+ Yet an other implementation may keep track of the hot regions, by starting
+ with an empty set, and writing down a journal of region numbers that have
+ become "hot", or have "cooled down" again.
+
+ To be able to use a ring buffer for this journal of changes to the active
+ set, we not only record the actual changes to that set, but also record the
+ not changing members of the set in a round robin fashion. To do so, we use a
+ fixed (but configurable) number of slots which we can identify by index, and
+ associate region numbers (labels) with these indices.
+ For each transaction recording a change to the active set, we record the
+ change itself (index: -old_label, +new_label), and which index is associated
+ with which label (index: current_label) within a certain sliding window that
+ is moved further over the available indices with each such transaction.
+
+ Thus, for crash recovery, if the ringbuffer is sufficiently large, we can
+ accurately reconstruct the active set.
+
+ Sufficiently large depends only on maximum number of active objects, and the
+ size of the sliding window recording "index: current_label" associations within
+ each transaction.
+
+ This is what we call the "activity log".
+
+ Currently we need one activity log transaction per single label change, which
+ does not give much benefit over the "dirty chunks of bitmap" approach, other
+ than potentially less seeks.
+
+ We plan to change the transaction format to support multiple changes per
+ transaction, which then would reduce several (disjoint, "random") updates to
+ the bitmap into one transaction to the activity log ring buffer.
+*/
+
+/* this defines an element in a tracked set
+ * .colision is for hash table lookup.
+ * When we process a new IO request, we know its sector, thus can deduce the
+ * region number (label) easily. To do the label -> object lookup without a
+ * full list walk, we use a simple hash table.
+ *
+ * .list is on one of three lists:
+ * in_use: currently in use (refcnt > 0, lc_number != LC_FREE)
+ * lru: unused but ready to be reused or recycled
+ * (ts_refcnt == 0, lc_number != LC_FREE),
+ * free: unused but ready to be recycled
+ * (ts_refcnt == 0, lc_number == LC_FREE),
+ *
+ * an element is said to be "in the active set",
+ * if either on "in_use" or "lru", i.e. lc_number != LC_FREE.
+ *
+ * DRBD currently only uses 61 elements on the resync lru_cache (total memory
+ * usage 2 pages), and up to 3833 elements on the act_log lru_cache, totalling
+ * ~215 kB for 64bit architechture, ~53 pages.
+ *
+ * We usually do not actually free these objects again, but only "recycle"
+ * them, as the change "index: -old_label, +LC_FREE" would need a transaction
+ * as well. Which also means that using a kmem_cache or even mempool to
+ * allocate the objects from wastes some resources. But it would avoid high
+ * order page allocations in kmalloc, so we may change to a kmem_cache backed
+ * allocation of the elements in the near future.
+ */
+struct lc_element {
+ struct hlist_node colision;
+ struct list_head list; /* LRU list or free list */
+ unsigned int refcnt;
+ unsigned int lc_number;
+};
+
+struct lru_cache {
+ /* the least recently used item is kept at lru->prev */
+ struct list_head lru;
+ struct list_head free;
+ struct list_head in_use;
+
+ /* size of tracked objects */
+ size_t element_size;
+ /* offset of struct lc_element member in the tracked object */
+ size_t element_off;
+
+ /* number of elements (indices) */
+ unsigned int nr_elements;
+
+ /* statistics */
+ unsigned int used;
+ unsigned long hits, misses, starving, dirty, changed;
+
+ /* see below: flag-bits for lru_cache */
+ unsigned long flags;
+
+ /* when changing the label of an index element */
+ unsigned int new_number;
+
+ /* for paranoia when changing the label of an index element */
+ struct lc_element *changing_element;
+
+ void *lc_private;
+ const char *name;
+
+ struct hlist_head slot[0];
+ /* hash colision chains here, then element storage. */
+};
+
+
+/* flag-bits for lru_cache */
+enum {
+ /* debugging aid, to catch concurrent access early.
+ * user needs to guarantee exclusive access by proper locking! */
+ __LC_PARANOIA,
+ /* if we need to change the set, but currently there is a changing
+ * transaction pending, we are "dirty", and must deferr further
+ * changing requests */
+ __LC_DIRTY,
+ /* if we need to change the set, but currently there is no free nor
+ * unused element available, we are "starving", and must not give out
+ * further references, to guarantee that eventually some refcnt will
+ * drop to zero and we will be able to make progress again, changing
+ * the set, writing the transaction.
+ * if the statistics say we are frequently starving,
+ * nr_elements is too small. */
+ __LC_STARVING,
+};
+#define LC_PARANOIA (1<<__LC_PARANOIA)
+#define LC_DIRTY (1<<__LC_DIRTY)
+#define LC_STARVING (1<<__LC_STARVING)
+
+extern struct lru_cache *lc_create(const char *name, unsigned int e_count,
+ size_t e_size, size_t e_off);
+extern void lc_reset(struct lru_cache *lc);
+extern void lc_destroy(struct lru_cache *lc);
+extern void lc_set(struct lru_cache *lc, unsigned int enr, int index);
+extern void lc_del(struct lru_cache *lc, struct lc_element *element);
+
+extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
+extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
+extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
+extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
+extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
+
+struct seq_file;
+extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
+
+extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
+ void (*detail) (struct seq_file *, struct lc_element *));
+
+/* This can be used to stop lc_get from changing the set of active elements.
+ * Note that the reference counts and order on the lru list may still change.
+ * returns true if we aquired the lock.
+ */
+static inline int lc_try_lock(struct lru_cache *lc)
+{
+ return !test_and_set_bit(__LC_DIRTY, &lc->flags);
+}
+
+static inline void lc_unlock(struct lru_cache *lc)
+{
+ clear_bit(__LC_DIRTY, &lc->flags);
+ smp_mb__after_clear_bit();
+}
+
+static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
+{
+ struct lc_element *e = lc_find(lc, enr);
+ return e && e->refcnt;
+}
+
+#define LC_FREE (-1U)
+
+#define lc_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+static inline struct lc_element *
+lc_element_by_index(struct lru_cache *lc, unsigned int i)
+{
+ BUG_ON(i >= lc->nr_elements);
+ return (struct lc_element *)(
+ ((char *)(lc->slot + lc->nr_elements)) +
+ i * lc->element_size
+ + lc->element_off);
+}
+
+static inline size_t lc_index_of(struct lru_cache *lc, struct lc_element *e)
+{
+ size_t i = ((char *)(e) - lc->element_off
+ - ((char *)(lc->slot + lc->nr_elements)))
+ / lc->element_size;
+ BUG_ON(i >= lc->nr_elements);
+ BUG_ON(e != lc_element_by_index(lc, i));
+ return i;
+}
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index 8ade0a7a91e0..8018c5b70562 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -191,4 +191,7 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
config NLATTR
bool
+config LRU_CACHE
+ bool
+
endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 33a40e40e3ee..ade9b86d04d1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -90,6 +90,8 @@ obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
obj-$(CONFIG_NLATTR) += nlattr.o
+obj-$(CONFIG_LRU_CACHE) += lru_cache.o
+
obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
hostprogs-y := gen_crc32table
diff --git a/drivers/block/drbd/lru_cache.c b/lib/lru_cache.c
index 80b0839a529d..f8632f1f7f7c 100644
--- a/drivers/block/drbd/lru_cache.c
+++ b/lib/lru_cache.c
@@ -23,11 +23,12 @@
*/
+#include <linux/module.h>
#include <linux/bitops.h>
-#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include <linux/string.h> /* for memset */
#include <linux/seq_file.h> /* for seq_printf */
-#include "lru_cache.h"
+#include <linux/lru_cache.h>
/* this is developers aid only! */
#define PARANOIA_ENTRY() BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags))
@@ -43,7 +44,7 @@ static size_t size_of_lc(unsigned int e_count, size_t e_size)
static void lc_init(struct lru_cache *lc,
const size_t bytes, const char *name,
const unsigned int e_count, const size_t e_size,
- void *private_p)
+ const size_t e_off)
{
struct lc_element *e;
unsigned int i;
@@ -55,12 +56,12 @@ static void lc_init(struct lru_cache *lc,
INIT_LIST_HEAD(&lc->lru);
INIT_LIST_HEAD(&lc->free);
lc->element_size = e_size;
+ lc->element_off = e_off;
lc->nr_elements = e_count;
lc->new_number = -1;
- lc->lc_private = private_p;
lc->name = name;
for (i = 0; i < e_count; i++) {
- e = lc_entry(lc, i);
+ e = lc_element_by_index(lc, i);
e->lc_number = LC_FREE;
list_add(&e->list, &lc->free);
/* memset(,0,) did the rest of init for us */
@@ -68,46 +69,61 @@ static void lc_init(struct lru_cache *lc,
}
/**
- * lc_alloc: allocates memory for @e_count objects of @e_size bytes plus the
- * struct lru_cache, and the hash table slots.
- * returns pointer to a newly initialized lru_cache object with said parameters.
+ * lc_create - prepares to track objects in an active set
+ * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump
+ * @e_count: number of elements allowed to be active simultaneously
+ * @e_size: size of the tracked objects
+ * @e_off: offset to the &struct lc_element member in a tracked object
+ *
+ * Returns a pointer to a newly initialized struct lru_cache on success,
+ * or NULL on (allocation) failure.
*/
-struct lru_cache *lc_alloc(const char *name, unsigned int e_count,
- size_t e_size, void *private_p)
+struct lru_cache *lc_create(const char *name, unsigned int e_count,
+ size_t e_size, size_t e_off)
{
struct lru_cache *lc;
size_t bytes;
BUG_ON(!e_count);
+ BUG_ON(e_size < sizeof(struct lc_element));
+ BUG_ON(e_size - sizeof(struct lc_element) < e_off);
+ e_size = ALIGN(e_size, sizeof(void *));
e_size = max(sizeof(struct lc_element), e_size);
bytes = size_of_lc(e_count, e_size);
- lc = vmalloc(bytes);
+ lc = kmalloc(bytes, GFP_KERNEL);
if (lc)
- lc_init(lc, bytes, name, e_count, e_size, private_p);
+ lc_init(lc, bytes, name, e_count, e_size, e_off);
return lc;
}
/**
- * lc_free: Frees memory allocated by lc_alloc.
- * @lc: The lru_cache object
+ * lc_destroy - frees memory allocated by lc_create()
+ * @lc: the lru cache to operate on
*/
-void lc_free(struct lru_cache *lc)
+void lc_destroy(struct lru_cache *lc)
{
- vfree(lc);
+ kfree(lc);
}
/**
- * lc_reset: does a full reset for @lc and the hash table slots.
+ * lc_reset - does a full reset for @lc and the hash table slots.
+ * @lc: the lru cache to operate on
+ *
* It is roughly the equivalent of re-allocating a fresh lru_cache object,
- * basically a short cut to lc_free(lc); lc = lc_alloc(...);
+ * basically a short cut to lc_destroy(lc); lc = lc_create(...);
*/
void lc_reset(struct lru_cache *lc)
{
lc_init(lc, size_of_lc(lc->nr_elements, lc->element_size), lc->name,
- lc->nr_elements, lc->element_size, lc->lc_private);
+ lc->nr_elements, lc->element_size, lc->element_off);
}
-size_t lc_printf_stats(struct seq_file *seq, struct lru_cache *lc)
+/**
+ * lc_seq_printf_stats - print stats about @ts into @seq
+ * @seq: the seq_file to print into
+ * @ts: the tracked set to print statistics of
+ */
+size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
{
/* NOTE:
* total calls to lc_get are
@@ -129,10 +145,13 @@ static unsigned int lc_hash_fn(struct lru_cache *lc, unsigned int enr)
/**
- * lc_find: Returns the pointer to an element, if the element is present
- * in the hash table. In case it is not this function returns NULL.
+ * lc_find - find element by label, if present in the hash table
* @lc: The lru_cache object
* @enr: element number
+ *
+ * Returns the pointer to an element, if the element with the requested
+ * "label" or element number is present in the hash table,
+ * or NULL if not found. Does not change the refcnt.
*/
struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
{
@@ -147,6 +166,7 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
return NULL;
}
+/* returned element will be "recycled" immediately */
static struct lc_element *lc_evict(struct lru_cache *lc)
{
struct list_head *n;
@@ -164,15 +184,18 @@ static struct lc_element *lc_evict(struct lru_cache *lc)
}
/**
- * lc_del: Removes an element from the cache (and therefore adds the
- * element's storage to the free list)
- *
+ * lc_del - removes an element from the cache
* @lc: The lru_cache object
* @e: The element to remove
+ *
+ * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list,
+ * sets @e->enr to %LC_FREE.
*/
void lc_del(struct lru_cache *lc, struct lc_element *e)
{
PARANOIA_ENTRY();
+ BUG_ON(e < lc_element_by_index(lc, 0));
+ BUG_ON(e > lc_element_by_index(lc, lc->nr_elements-1));
BUG_ON(e->refcnt);
list_del(&e->list);
hlist_del_init(&e->colision);
@@ -206,31 +229,41 @@ static int lc_unused_element_available(struct lru_cache *lc)
/**
- * lc_get: Finds an element in the cache, increases its usage count,
+ * lc_get - get element by label, maybe change the active set
+ * @lc: the lru cache to operate on
+ * @enr: the label to look up
+ *
+ * Finds an element in the cache, increases its usage count,
* "touches" and returns it.
+ *
* In case the requested number is not present, it needs to be added to the
* cache. Therefore it is possible that an other element becomes eviced from
* the cache. In either case, the user is notified so he is able to e.g. keep
* a persistent log of the cache changes, and therefore the objects in use.
*
* Return values:
- * NULL if the requested element number was not in the cache, and no unused
- * element could be recycled
- * pointer to the element with the REQUESTED element number
- * In this case, it can be used right away
+ * NULL
+ * The cache was marked %TS_STARVING,
+ * or the requested label was not in the active set
+ * and a changing transaction is still pending (@lc was marked %LC_DIRTY).
+ * Or no unused or free element could be recycled (@ts will be marked as
+ * %TS_STARVING, blocking further ts_get() operations).
+ *
+ * pointer to the element with the REQUESTED element number.
+ * In this case, it can be used right away
+ *
+ * pointer to an UNUSED element with some different element number,
+ * where that different number may also be %LC_FREE.
*
- * pointer to an UNUSED element with some different element number.
- * In this case, the cache is marked dirty, and the returned element
- * pointer is removed from the lru list and hash collision chains.
- * The user now should do whatever houskeeping is necessary. Then he
- * needs to call lc_element_changed(lc,element_pointer), to finish the
- * change.
+ * In this case, the cache is marked %LC_DIRTY (blocking further changes),
+ * and the returned element pointer is removed from the lru list and
+ * hash collision chains. The user now should do whatever houskeeping
+ * is necessary.
+ * Then he must call lc_changed(lc,element_pointer), to finish
+ * the change.
*
* NOTE: The user needs to check the lc_number on EACH use, so he recognizes
* any cache set change.
- *
- * @lc: The lru_cache object
- * @enr: element number
*/
struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
{
@@ -264,10 +297,10 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
RETURN(NULL);
}
- /* it was not present in the cache, find an unused element,
- * which then is replaced.
- * we need to update the cache; serialize on lc->flags & LC_DIRTY
- */
+ /* it was not present in the active set.
+ * we are going to recycle an unused (or even "free") element.
+ * user may need to commit a transaction to record that change.
+ * we serialize on flags & TF_DIRTY */
if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
++lc->dirty;
RETURN(NULL);
@@ -289,6 +322,7 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
/* similar to lc_get,
* but only gets a new reference on an existing element.
* you either get the requested element, or NULL.
+ * will be consolidated into one function.
*/
struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
{
@@ -313,6 +347,11 @@ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
RETURN(e);
}
+/**
+ * lc_changed - tell @lc that the change has been recorded
+ * @lc: the lru cache to operate on
+ * @e: the element pending label change
+ */
void lc_changed(struct lru_cache *lc, struct lc_element *e)
{
PARANOIA_ENTRY();
@@ -330,6 +369,15 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e)
}
+/**
+ * lc_put - give up refcnt of @e
+ * @lc: the lru cache to operate on
+ * @e: the element to put
+ *
+ * If refcnt reaches zero, the element is moved to the lru list,
+ * and a %TS_STARVING (if set) is cleared.
+ * Returns the new (post-decrement) refcnt.
+ */
unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
{
BUG_ON(!lc);
@@ -351,11 +399,12 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
/**
- * lc_set: Sets an element in the cache. You might use this function to
- * setup the cache. It is expected that the elements are properly initialized.
- * @lc: The lru_cache object
- * @enr: element number
- * @index: The elements' position in the cache
+ * lc_set - associate index with label
+ * @lc: the lru cache to operate on
+ * @enr: the label to set
+ * @index: the element index to associate label with.
+ *
+ * Used to initialize the active set to some previously recorded state.
*/
void lc_set(struct lru_cache *lc, unsigned int enr, int index)
{
@@ -364,7 +413,7 @@ void lc_set(struct lru_cache *lc, unsigned int enr, int index)
if (index < 0 || index >= lc->nr_elements)
return;
- e = lc_entry(lc, index);
+ e = lc_element_by_index(lc, index);
e->lc_number = enr;
hlist_del_init(&e->colision);
@@ -373,9 +422,14 @@ void lc_set(struct lru_cache *lc, unsigned int enr, int index)
}
/**
- * lc_dump: Dump a complete LRU cache to seq in textual form.
+ * lc_dump - Dump a complete LRU cache to seq in textual form.
+ * @lc: the lru cache to operate on
+ * @seq: the &struct seq_file pointer to seq_printf into
+ * @utext: user supplied "heading" or other info
+ * @detail: function pointer the user may provide to dump further details
+ * of the object the lc_element is embeded in.
*/
-void lc_dump(struct lru_cache *lc, struct seq_file *seq, char *utext,
+void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
void (*detail) (struct seq_file *, struct lc_element *))
{
unsigned int nr_elements = lc->nr_elements;
@@ -384,7 +438,7 @@ void lc_dump(struct lru_cache *lc, struct seq_file *seq, char *utext,
seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext);
for (i = 0; i < nr_elements; i++) {
- e = lc_entry(lc, i);
+ e = lc_element_by_index(lc, i);
if (e->lc_number == LC_FREE) {
seq_printf(seq, "\t%2d: FREE\n", i);
} else {
@@ -396,3 +450,15 @@ void lc_dump(struct lru_cache *lc, struct seq_file *seq, char *utext,
}
}
+EXPORT_SYMBOL(lc_create);
+EXPORT_SYMBOL(lc_reset);
+EXPORT_SYMBOL(lc_destroy);
+EXPORT_SYMBOL(lc_set);
+EXPORT_SYMBOL(lc_del);
+EXPORT_SYMBOL(lc_try_get);
+EXPORT_SYMBOL(lc_find);
+EXPORT_SYMBOL(lc_get);
+EXPORT_SYMBOL(lc_put);
+EXPORT_SYMBOL(lc_changed);
+EXPORT_SYMBOL(lc_seq_printf_stats);
+EXPORT_SYMBOL(lc_seq_dump_details);