diff options
Diffstat (limited to 'fs/bio.c')
-rw-r--r-- | fs/bio.c | 601 |
1 files changed, 433 insertions, 168 deletions
@@ -19,6 +19,7 @@ #include <linux/swap.h> #include <linux/bio.h> #include <linux/blkdev.h> +#include <linux/uio.h> #include <linux/iocontext.h> #include <linux/slab.h> #include <linux/init.h> @@ -55,6 +56,7 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { * IO code that does not need private memory pools. */ struct bio_set *fs_bio_set; +EXPORT_SYMBOL(fs_bio_set); /* * Our slab pool management @@ -73,7 +75,8 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) { unsigned int sz = sizeof(struct bio) + extra_size; struct kmem_cache *slab = NULL; - struct bio_slab *bslab; + struct bio_slab *bslab, *new_bio_slabs; + unsigned int new_bio_slab_max; unsigned int i, entry = -1; mutex_lock(&bio_slab_lock); @@ -96,12 +99,14 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) goto out_unlock; if (bio_slab_nr == bio_slab_max && entry == -1) { - bio_slab_max <<= 1; - bio_slabs = krealloc(bio_slabs, - bio_slab_max * sizeof(struct bio_slab), - GFP_KERNEL); - if (!bio_slabs) + new_bio_slab_max = bio_slab_max << 1; + new_bio_slabs = krealloc(bio_slabs, + new_bio_slab_max * sizeof(struct bio_slab), + GFP_KERNEL); + if (!new_bio_slabs) goto out_unlock; + bio_slab_max = new_bio_slab_max; + bio_slabs = new_bio_slabs; } if (entry == -1) entry = bio_slab_nr++; @@ -156,12 +161,12 @@ unsigned int bvec_nr_vecs(unsigned short idx) return bvec_slabs[idx].nr_vecs; } -void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) +void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) { BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); if (idx == BIOVEC_MAX_IDX) - mempool_free(bv, bs->bvec_pool); + mempool_free(bv, pool); else { struct biovec_slab *bvs = bvec_slabs + idx; @@ -169,8 +174,8 @@ void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) } } -struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, - struct bio_set *bs) +struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, + mempool_t *pool) { struct bio_vec *bvl; @@ -206,7 +211,7 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, */ if (*idx == BIOVEC_MAX_IDX) { fallback: - bvl = mempool_alloc(bs->bvec_pool, gfp_mask); + bvl = mempool_alloc(pool, gfp_mask); } else { struct biovec_slab *bvs = bvec_slabs + *idx; gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); @@ -232,26 +237,37 @@ fallback: return bvl; } -void bio_free(struct bio *bio, struct bio_set *bs) +static void __bio_free(struct bio *bio) { + bio_disassociate_task(bio); + + if (bio_integrity(bio)) + bio_integrity_free(bio); +} + +static void bio_free(struct bio *bio) +{ + struct bio_set *bs = bio->bi_pool; void *p; - if (bio_has_allocated_vec(bio)) - bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); + __bio_free(bio); - if (bio_integrity(bio)) - bio_integrity_free(bio, bs); + if (bs) { + if (bio_flagged(bio, BIO_OWNS_VEC)) + bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); - /* - * If we have front padding, adjust the bio pointer before freeing - */ - p = bio; - if (bs->front_pad) + /* + * If we have front padding, adjust the bio pointer before freeing + */ + p = bio; p -= bs->front_pad; - mempool_free(p, bs->bio_pool); + mempool_free(p, bs->bio_pool); + } else { + /* Bio was allocated by bio_kmalloc() */ + kfree(bio); + } } -EXPORT_SYMBOL(bio_free); void bio_init(struct bio *bio) { @@ -262,132 +278,197 @@ void bio_init(struct bio *bio) EXPORT_SYMBOL(bio_init); /** - * bio_alloc_bioset - allocate a bio for I/O - * @gfp_mask: the GFP_ mask given to the slab allocator - * @nr_iovecs: number of iovecs to pre-allocate - * @bs: the bio_set to allocate from. + * bio_reset - reinitialize a bio + * @bio: bio to reset * * Description: - * bio_alloc_bioset will try its own mempool to satisfy the allocation. - * If %__GFP_WAIT is set then we will block on the internal pool waiting - * for a &struct bio to become free. - * - * Note that the caller must set ->bi_destructor on successful return - * of a bio, to do the appropriate freeing of the bio once the reference - * count drops to zero. - **/ -struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) + * After calling bio_reset(), @bio will be in the same state as a freshly + * allocated bio returned bio bio_alloc_bioset() - the only fields that are + * preserved are the ones that are initialized by bio_alloc_bioset(). See + * comment in struct bio. + */ +void bio_reset(struct bio *bio) { - unsigned long idx = BIO_POOL_NONE; - struct bio_vec *bvl = NULL; - struct bio *bio; - void *p; + unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS); - p = mempool_alloc(bs->bio_pool, gfp_mask); - if (unlikely(!p)) - return NULL; - bio = p + bs->front_pad; + __bio_free(bio); - bio_init(bio); + memset(bio, 0, BIO_RESET_BYTES); + bio->bi_flags = flags|(1 << BIO_UPTODATE); +} +EXPORT_SYMBOL(bio_reset); - if (unlikely(!nr_iovecs)) - goto out_set; +static void bio_alloc_rescue(struct work_struct *work) +{ + struct bio_set *bs = container_of(work, struct bio_set, rescue_work); + struct bio *bio; - if (nr_iovecs <= BIO_INLINE_VECS) { - bvl = bio->bi_inline_vecs; - nr_iovecs = BIO_INLINE_VECS; - } else { - bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); - if (unlikely(!bvl)) - goto err_free; + while (1) { + spin_lock(&bs->rescue_lock); + bio = bio_list_pop(&bs->rescue_list); + spin_unlock(&bs->rescue_lock); - nr_iovecs = bvec_nr_vecs(idx); - } -out_set: - bio->bi_flags |= idx << BIO_POOL_OFFSET; - bio->bi_max_vecs = nr_iovecs; - bio->bi_io_vec = bvl; - return bio; + if (!bio) + break; -err_free: - mempool_free(p, bs->bio_pool); - return NULL; + generic_make_request(bio); + } } -EXPORT_SYMBOL(bio_alloc_bioset); -static void bio_fs_destructor(struct bio *bio) +static void punt_bios_to_rescuer(struct bio_set *bs) { - bio_free(bio, fs_bio_set); -} + struct bio_list punt, nopunt; + struct bio *bio; -/** - * bio_alloc - allocate a new bio, memory pool backed - * @gfp_mask: allocation mask to use - * @nr_iovecs: number of iovecs - * - * bio_alloc will allocate a bio and associated bio_vec array that can hold - * at least @nr_iovecs entries. Allocations will be done from the - * fs_bio_set. Also see @bio_alloc_bioset and @bio_kmalloc. - * - * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate - * a bio. This is due to the mempool guarantees. To make this work, callers - * must never allocate more than 1 bio at a time from this pool. Callers - * that need to allocate more than 1 bio must always submit the previously - * allocated bio for IO before attempting to allocate a new one. Failure to - * do so can cause livelocks under memory pressure. - * - * RETURNS: - * Pointer to new bio on success, NULL on failure. - */ -struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) -{ - struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); + /* + * In order to guarantee forward progress we must punt only bios that + * were allocated from this bio_set; otherwise, if there was a bio on + * there for a stacking driver higher up in the stack, processing it + * could require allocating bios from this bio_set, and doing that from + * our own rescuer would be bad. + * + * Since bio lists are singly linked, pop them all instead of trying to + * remove from the middle of the list: + */ - if (bio) - bio->bi_destructor = bio_fs_destructor; + bio_list_init(&punt); + bio_list_init(&nopunt); - return bio; -} -EXPORT_SYMBOL(bio_alloc); + while ((bio = bio_list_pop(current->bio_list))) + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); -static void bio_kmalloc_destructor(struct bio *bio) -{ - if (bio_integrity(bio)) - bio_integrity_free(bio, fs_bio_set); - kfree(bio); + *current->bio_list = nopunt; + + spin_lock(&bs->rescue_lock); + bio_list_merge(&bs->rescue_list, &punt); + spin_unlock(&bs->rescue_lock); + + queue_work(bs->rescue_workqueue, &bs->rescue_work); } /** - * bio_kmalloc - allocate a bio for I/O using kmalloc() + * bio_alloc_bioset - allocate a bio for I/O * @gfp_mask: the GFP_ mask given to the slab allocator * @nr_iovecs: number of iovecs to pre-allocate + * @bs: the bio_set to allocate from. * * Description: - * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask contains - * %__GFP_WAIT, the allocation is guaranteed to succeed. + * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is + * backed by the @bs's mempool. * - **/ -struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) + * When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be + * able to allocate a bio. This is due to the mempool guarantees. To make this + * work, callers must never allocate more than 1 bio at a time from this pool. + * Callers that need to allocate more than 1 bio must always submit the + * previously allocated bio for IO before attempting to allocate a new one. + * Failure to do so can cause deadlocks under memory pressure. + * + * Note that when running under generic_make_request() (i.e. any block + * driver), bios are not submitted until after you return - see the code in + * generic_make_request() that converts recursion into iteration, to prevent + * stack overflows. + * + * This would normally mean allocating multiple bios under + * generic_make_request() would be susceptible to deadlocks, but we have + * deadlock avoidance code that resubmits any blocked bios from a rescuer + * thread. + * + * However, we do not guarantee forward progress for allocations from other + * mempools. Doing multiple allocations from the same mempool under + * generic_make_request() should be avoided - instead, use bio_set's front_pad + * for per bio allocations. + * + * RETURNS: + * Pointer to new bio on success, NULL on failure. + */ +struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { + gfp_t saved_gfp = gfp_mask; + unsigned front_pad; + unsigned inline_vecs; + unsigned long idx = BIO_POOL_NONE; + struct bio_vec *bvl = NULL; struct bio *bio; + void *p; - if (nr_iovecs > UIO_MAXIOV) - return NULL; + if (!bs) { + if (nr_iovecs > UIO_MAXIOV) + return NULL; + + p = kmalloc(sizeof(struct bio) + + nr_iovecs * sizeof(struct bio_vec), + gfp_mask); + front_pad = 0; + inline_vecs = nr_iovecs; + } else { + /* + * generic_make_request() converts recursion to iteration; this + * means if we're running beneath it, any bios we allocate and + * submit will not be submitted (and thus freed) until after we + * return. + * + * This exposes us to a potential deadlock if we allocate + * multiple bios from the same bio_set() while running + * underneath generic_make_request(). If we were to allocate + * multiple bios (say a stacking block driver that was splitting + * bios), we would deadlock if we exhausted the mempool's + * reserve. + * + * We solve this, and guarantee forward progress, with a rescuer + * workqueue per bio_set. If we go to allocate and there are + * bios on current->bio_list, we first try the allocation + * without __GFP_WAIT; if that fails, we punt those bios we + * would be blocking to the rescuer workqueue before we retry + * with the original gfp_flags. + */ + + if (current->bio_list && !bio_list_empty(current->bio_list)) + gfp_mask &= ~__GFP_WAIT; + + p = mempool_alloc(bs->bio_pool, gfp_mask); + if (!p && gfp_mask != saved_gfp) { + punt_bios_to_rescuer(bs); + gfp_mask = saved_gfp; + p = mempool_alloc(bs->bio_pool, gfp_mask); + } + + front_pad = bs->front_pad; + inline_vecs = BIO_INLINE_VECS; + } - bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), - gfp_mask); - if (unlikely(!bio)) + if (unlikely(!p)) return NULL; + bio = p + front_pad; bio_init(bio); - bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; - bio->bi_max_vecs = nr_iovecs; - bio->bi_io_vec = bio->bi_inline_vecs; - bio->bi_destructor = bio_kmalloc_destructor; + if (nr_iovecs > inline_vecs) { + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); + if (!bvl && gfp_mask != saved_gfp) { + punt_bios_to_rescuer(bs); + gfp_mask = saved_gfp; + bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); + } + + if (unlikely(!bvl)) + goto err_free; + + bio->bi_flags |= 1 << BIO_OWNS_VEC; + } else if (nr_iovecs) { + bvl = bio->bi_inline_vecs; + } + + bio->bi_pool = bs; + bio->bi_flags |= idx << BIO_POOL_OFFSET; + bio->bi_max_vecs = nr_iovecs; + bio->bi_io_vec = bvl; return bio; + +err_free: + mempool_free(p, bs->bio_pool); + return NULL; } -EXPORT_SYMBOL(bio_kmalloc); +EXPORT_SYMBOL(bio_alloc_bioset); void zero_fill_bio(struct bio *bio) { @@ -419,11 +500,8 @@ void bio_put(struct bio *bio) /* * last put frees it */ - if (atomic_dec_and_test(&bio->bi_cnt)) { - bio_disassociate_task(bio); - bio->bi_next = NULL; - bio->bi_destructor(bio); - } + if (atomic_dec_and_test(&bio->bi_cnt)) + bio_free(bio); } EXPORT_SYMBOL(bio_put); @@ -465,26 +543,28 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) EXPORT_SYMBOL(__bio_clone); /** - * bio_clone - clone a bio + * bio_clone_bioset - clone a bio * @bio: bio to clone * @gfp_mask: allocation priority + * @bs: bio_set to allocate from * * Like __bio_clone, only also allocates the returned bio */ -struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) +struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask, + struct bio_set *bs) { - struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); + struct bio *b; + b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs); if (!b) return NULL; - b->bi_destructor = bio_fs_destructor; __bio_clone(b, bio); if (bio_integrity(bio)) { int ret; - ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set); + ret = bio_integrity_clone(b, bio, gfp_mask); if (ret < 0) { bio_put(b); @@ -494,7 +574,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) return b; } -EXPORT_SYMBOL(bio_clone); +EXPORT_SYMBOL(bio_clone_bioset); /** * bio_get_nr_vecs - return approx number of vecs @@ -675,6 +755,181 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, } EXPORT_SYMBOL(bio_add_page); +struct submit_bio_ret { + struct completion event; + int error; +}; + +static void submit_bio_wait_endio(struct bio *bio, int error) +{ + struct submit_bio_ret *ret = bio->bi_private; + + ret->error = error; + complete(&ret->event); +} + +/** + * submit_bio_wait - submit a bio, and wait until it completes + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bio: The &struct bio which describes the I/O + * + * Simple wrapper around submit_bio(). Returns 0 on success, or the error from + * bio_endio() on failure. + */ +int submit_bio_wait(int rw, struct bio *bio) +{ + struct submit_bio_ret ret; + + rw |= REQ_SYNC; + init_completion(&ret.event); + bio->bi_private = &ret; + bio->bi_end_io = submit_bio_wait_endio; + submit_bio(rw, bio); + wait_for_completion(&ret.event); + + return ret.error; +} +EXPORT_SYMBOL(submit_bio_wait); + +/** + * bio_advance - increment/complete a bio by some number of bytes + * @bio: bio to advance + * @bytes: number of bytes to complete + * + * This updates bi_sector, bi_size and bi_idx; if the number of bytes to + * complete doesn't align with a bvec boundary, then bv_len and bv_offset will + * be updated on the last bvec as well. + * + * @bio will then represent the remaining, uncompleted portion of the io. + */ +void bio_advance(struct bio *bio, unsigned bytes) +{ + if (bio_integrity(bio)) + bio_integrity_advance(bio, bytes); + + bio->bi_sector += bytes >> 9; + bio->bi_size -= bytes; + + if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK) + return; + + while (bytes) { + if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { + WARN_ONCE(1, "bio idx %d >= vcnt %d\n", + bio->bi_idx, bio->bi_vcnt); + break; + } + + if (bytes >= bio_iovec(bio)->bv_len) { + bytes -= bio_iovec(bio)->bv_len; + bio->bi_idx++; + } else { + bio_iovec(bio)->bv_len -= bytes; + bio_iovec(bio)->bv_offset += bytes; + bytes = 0; + } + } +} +EXPORT_SYMBOL(bio_advance); + +/** + * bio_alloc_pages - allocates a single page for each bvec in a bio + * @bio: bio to allocate pages for + * @gfp_mask: flags for allocation + * + * Allocates pages up to @bio->bi_vcnt. + * + * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are + * freed. + */ +int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) +{ + int i; + struct bio_vec *bv; + + bio_for_each_segment_all(bv, bio, i) { + bv->bv_page = alloc_page(gfp_mask); + if (!bv->bv_page) { + while (--bv >= bio->bi_io_vec) + __free_page(bv->bv_page); + return -ENOMEM; + } + } + + return 0; +} +EXPORT_SYMBOL(bio_alloc_pages); + +/** + * bio_copy_data - copy contents of data buffers from one chain of bios to + * another + * @src: source bio list + * @dst: destination bio list + * + * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats + * @src and @dst as linked lists of bios. + * + * Stops when it reaches the end of either @src or @dst - that is, copies + * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios). + */ +void bio_copy_data(struct bio *dst, struct bio *src) +{ + struct bio_vec *src_bv, *dst_bv; + unsigned src_offset, dst_offset, bytes; + void *src_p, *dst_p; + + src_bv = bio_iovec(src); + dst_bv = bio_iovec(dst); + + src_offset = src_bv->bv_offset; + dst_offset = dst_bv->bv_offset; + + while (1) { + if (src_offset == src_bv->bv_offset + src_bv->bv_len) { + src_bv++; + if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) { + src = src->bi_next; + if (!src) + break; + + src_bv = bio_iovec(src); + } + + src_offset = src_bv->bv_offset; + } + + if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) { + dst_bv++; + if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) { + dst = dst->bi_next; + if (!dst) + break; + + dst_bv = bio_iovec(dst); + } + + dst_offset = dst_bv->bv_offset; + } + + bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset, + src_bv->bv_offset + src_bv->bv_len - src_offset); + + src_p = kmap_atomic(src_bv->bv_page); + dst_p = kmap_atomic(dst_bv->bv_page); + + memcpy(dst_p + dst_bv->bv_offset, + src_p + src_bv->bv_offset, + bytes); + + kunmap_atomic(dst_p); + kunmap_atomic(src_p); + + src_offset += bytes; + dst_offset += bytes; + } +} +EXPORT_SYMBOL(bio_copy_data); + struct bio_map_data { struct bio_vec *iovecs; struct sg_iovec *sgvecs; @@ -737,7 +992,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, int iov_idx = 0; unsigned int iov_off = 0; - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { char *bv_addr = page_address(bvec->bv_page); unsigned int bv_len = iovecs[i].bv_len; @@ -919,7 +1174,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, return bio; cleanup: if (!map_data) - bio_for_each_segment(bvec, bio, i) + bio_for_each_segment_all(bvec, bio, i) __free_page(bvec->bv_page); bio_put(bio); @@ -1133,7 +1388,7 @@ static void __bio_unmap_user(struct bio *bio) /* * make sure we dirty pages we wrote to */ - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { if (bio_data_dir(bio) == READ) set_page_dirty_lock(bvec->bv_page); @@ -1239,7 +1494,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err) int i; char *p = bmd->sgvecs[0].iov_base; - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment_all(bvec, bio, i) { char *addr = page_address(bvec->bv_page); int len = bmd->iovecs[i].bv_len; @@ -1279,7 +1534,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, if (!reading) { void *p = data; - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment_all(bvec, bio, i) { char *addr = page_address(bvec->bv_page); memcpy(addr, p, bvec->bv_len); @@ -1312,7 +1567,7 @@ EXPORT_SYMBOL(bio_copy_kern); * Note that this code is very hard to test under normal circumstances because * direct-io pins the pages with get_user_pages(). This makes * is_page_cache_freeable return false, and the VM will not clean the pages. - * But other code (eg, pdflush) could clean the pages if they are mapped + * But other code (eg, flusher threads) could clean the pages if they are mapped * pagecache. * * Simply disabling the call to bio_set_pages_dirty() is a good way to test the @@ -1324,11 +1579,11 @@ EXPORT_SYMBOL(bio_copy_kern); */ void bio_set_pages_dirty(struct bio *bio) { - struct bio_vec *bvec = bio->bi_io_vec; + struct bio_vec *bvec; int i; - for (i = 0; i < bio->bi_vcnt; i++) { - struct page *page = bvec[i].bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (page && !PageCompound(page)) set_page_dirty_lock(page); @@ -1337,11 +1592,11 @@ void bio_set_pages_dirty(struct bio *bio) static void bio_release_pages(struct bio *bio) { - struct bio_vec *bvec = bio->bi_io_vec; + struct bio_vec *bvec; int i; - for (i = 0; i < bio->bi_vcnt; i++) { - struct page *page = bvec[i].bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (page) put_page(page); @@ -1390,16 +1645,16 @@ static void bio_dirty_fn(struct work_struct *work) void bio_check_pages_dirty(struct bio *bio) { - struct bio_vec *bvec = bio->bi_io_vec; + struct bio_vec *bvec; int nr_clean_pages = 0; int i; - for (i = 0; i < bio->bi_vcnt; i++) { - struct page *page = bvec[i].bv_page; + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; if (PageDirty(page) || PageCompound(page)) { page_cache_release(page); - bvec[i].bv_page = NULL; + bvec->bv_page = NULL; } else { nr_clean_pages++; } @@ -1500,8 +1755,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) trace_block_split(bdev_get_queue(bi->bi_bdev), bi, bi->bi_sector + first_sectors); - BUG_ON(bi->bi_vcnt != 1); - BUG_ON(bi->bi_idx != 0); + BUG_ON(bio_segments(bi) > 1); atomic_set(&bp->cnt, 3); bp->error = 0; bp->bio1 = *bi; @@ -1510,17 +1764,22 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) bp->bio2.bi_size -= first_sectors << 9; bp->bio1.bi_size = first_sectors << 9; - bp->bv1 = bi->bi_io_vec[0]; - bp->bv2 = bi->bi_io_vec[0]; - bp->bv2.bv_offset += first_sectors << 9; - bp->bv2.bv_len -= first_sectors << 9; - bp->bv1.bv_len = first_sectors << 9; + if (bi->bi_vcnt != 0) { + bp->bv1 = *bio_iovec(bi); + bp->bv2 = *bio_iovec(bi); + + if (bio_is_rw(bi)) { + bp->bv2.bv_offset += first_sectors << 9; + bp->bv2.bv_len -= first_sectors << 9; + bp->bv1.bv_len = first_sectors << 9; + } - bp->bio1.bi_io_vec = &bp->bv1; - bp->bio2.bi_io_vec = &bp->bv2; + bp->bio1.bi_io_vec = &bp->bv1; + bp->bio2.bi_io_vec = &bp->bv2; - bp->bio1.bi_max_vecs = 1; - bp->bio2.bi_max_vecs = 1; + bp->bio1.bi_max_vecs = 1; + bp->bio2.bi_max_vecs = 1; + } bp->bio1.bi_end_io = bio_pair_end_1; bp->bio2.bi_end_io = bio_pair_end_2; @@ -1559,7 +1818,7 @@ sector_t bio_sector_offset(struct bio *bio, unsigned short index, if (index >= bio->bi_idx) index = bio->bi_vcnt - 1; - __bio_for_each_segment(bv, bio, i, 0) { + bio_for_each_segment_all(bv, bio, i) { if (i == index) { if (offset > bv->bv_offset) sectors += (offset - bv->bv_offset) / sector_sz; @@ -1577,29 +1836,25 @@ EXPORT_SYMBOL(bio_sector_offset); * create memory pools for biovec's in a bio_set. * use the global biovec slabs created for general use. */ -static int biovec_create_pools(struct bio_set *bs, int pool_entries) +mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries) { struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; - bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab); - if (!bs->bvec_pool) - return -ENOMEM; - - return 0; -} - -static void biovec_free_pools(struct bio_set *bs) -{ - mempool_destroy(bs->bvec_pool); + return mempool_create_slab_pool(pool_entries, bp->slab); } void bioset_free(struct bio_set *bs) { + if (bs->rescue_workqueue) + destroy_workqueue(bs->rescue_workqueue); + if (bs->bio_pool) mempool_destroy(bs->bio_pool); + if (bs->bvec_pool) + mempool_destroy(bs->bvec_pool); + bioset_integrity_free(bs); - biovec_free_pools(bs); bio_put_slab(bs); kfree(bs); @@ -1630,6 +1885,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) bs->front_pad = front_pad; + spin_lock_init(&bs->rescue_lock); + bio_list_init(&bs->rescue_list); + INIT_WORK(&bs->rescue_work, bio_alloc_rescue); + bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); if (!bs->bio_slab) { kfree(bs); @@ -1640,9 +1899,15 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; - if (!biovec_create_pools(bs, pool_size)) - return bs; + bs->bvec_pool = biovec_create_pool(bs, pool_size); + if (!bs->bvec_pool) + goto bad; + + bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); + if (!bs->rescue_workqueue) + goto bad; + return bs; bad: bioset_free(bs); return NULL; |