From 5263e31e452fb84138b9bee061d5c06c0f359fea Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:55 -0500 Subject: locks: move flock locks to file_lock_context Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- fs/nfs/write.c | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af3af685a9e3..e072aeb34195 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1113,6 +1113,11 @@ int nfs_flush_incompatible(struct file *file, struct page *page) do_flush |= l_ctx->lockowner.l_owner != current->files || l_ctx->lockowner.l_pid != current->tgid; } + if (l_ctx && ctx->dentry->d_inode->i_flctx && + !list_empty_careful(&ctx->dentry->d_inode->i_flctx->flc_flock)) { + do_flush |= l_ctx->lockowner.l_owner != current->files + || l_ctx->lockowner.l_pid != current->tgid; + } nfs_release_request(req); if (!do_flush) return 0; @@ -1170,6 +1175,13 @@ out: return PageUptodate(page) != 0; } +static bool +is_whole_file_wrlock(struct file_lock *fl) +{ + return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX && + fl->fl_type == F_WRLCK; +} + /* If we know the page is up to date, and we're not using byte range locks (or * if we have the whole file locked for writing), it may be more efficient to * extend the write to cover the entire page in order to avoid fragmentation @@ -1180,17 +1192,38 @@ out: */ static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) { + int ret; + struct file_lock_context *flctx = inode->i_flctx; + struct file_lock *fl; + if (file->f_flags & O_DSYNC) return 0; if (!nfs_write_pageuptodate(page, inode)) return 0; if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) return 1; - if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 && - inode->i_flock->fl_end == OFFSET_MAX && - inode->i_flock->fl_type != F_RDLCK)) - return 1; - return 0; + if (!inode->i_flock && !flctx) + return 0; + + /* Check to see if there are whole file write locks */ + spin_lock(&inode->i_lock); + ret = 0; + + fl = inode->i_flock; + if (fl && is_whole_file_wrlock(fl)) { + ret = 1; + goto out; + } + + if (!list_empty(&flctx->flc_flock)) { + fl = list_first_entry(&flctx->flc_flock, struct file_lock, + fl_list); + if (fl->fl_type == F_WRLCK) + ret = 1; + } +out: + spin_unlock(&inode->i_lock); + return ret; } /* -- cgit v1.2.3 From bd61e0a9c852de2d705b6f1bb2cc54c5774db570 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:55 -0500 Subject: locks: convert posix locks to file_lock_context Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- fs/ceph/locks.c | 58 +++++++++++++--------------- fs/cifs/file.c | 26 +++++-------- fs/lockd/svcsubs.c | 20 ++++++---- fs/locks.c | 108 +++++++++++++++++++++++++++------------------------- fs/nfs/delegation.c | 28 +++++--------- fs/nfs/nfs4state.c | 52 +++++-------------------- fs/nfs/pagelist.c | 8 ++-- fs/nfs/write.c | 30 +++++++-------- fs/nfsd/nfs4state.c | 18 +++++---- fs/read_write.c | 2 +- include/linux/fs.h | 3 +- 11 files changed, 155 insertions(+), 198 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 917656ea8dcf..19beeed83233 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -253,18 +253,15 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) *fcntl_count = 0; *flock_count = 0; - spin_lock(&inode->i_lock); - for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { - if (lock->fl_flags & FL_POSIX) - ++(*fcntl_count); - } - ctx = inode->i_flctx; if (ctx) { + spin_lock(&inode->i_lock); + list_for_each_entry(lock, &ctx->flc_posix, fl_list) + ++(*fcntl_count); list_for_each_entry(lock, &ctx->flc_flock, fl_list) ++(*flock_count); + spin_unlock(&inode->i_lock); } - spin_unlock(&inode->i_lock); dout("counted %d flock locks and %d fcntl locks", *flock_count, *fcntl_count); } @@ -279,7 +276,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, int num_fcntl_locks, int num_flock_locks) { struct file_lock *lock; - struct file_lock_context *ctx; + struct file_lock_context *ctx = inode->i_flctx; int err = 0; int seen_fcntl = 0; int seen_flock = 0; @@ -288,34 +285,31 @@ int ceph_encode_locks_to_buffer(struct inode *inode, dout("encoding %d flock and %d fcntl locks", num_flock_locks, num_fcntl_locks); + if (!ctx) + return 0; + spin_lock(&inode->i_lock); - for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { - if (lock->fl_flags & FL_POSIX) { - ++seen_fcntl; - if (seen_fcntl > num_fcntl_locks) { - err = -ENOSPC; - goto fail; - } - err = lock_to_ceph_filelock(lock, &flocks[l]); - if (err) - goto fail; - ++l; + list_for_each_entry(lock, &ctx->flc_flock, fl_list) { + ++seen_fcntl; + if (seen_fcntl > num_fcntl_locks) { + err = -ENOSPC; + goto fail; } + err = lock_to_ceph_filelock(lock, &flocks[l]); + if (err) + goto fail; + ++l; } - - ctx = inode->i_flctx; - if (ctx) { - list_for_each_entry(lock, &ctx->flc_flock, fl_list) { - ++seen_flock; - if (seen_flock > num_flock_locks) { - err = -ENOSPC; - goto fail; - } - err = lock_to_ceph_filelock(lock, &flocks[l]); - if (err) - goto fail; - ++l; + list_for_each_entry(lock, &ctx->flc_flock, fl_list) { + ++seen_flock; + if (seen_flock > num_flock_locks) { + err = -ENOSPC; + goto fail; } + err = lock_to_ceph_filelock(lock, &flocks[l]); + if (err) + goto fail; + ++l; } fail: spin_unlock(&inode->i_lock); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 96b7e9b7706d..ea78f6f81ce2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1109,11 +1109,6 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) return rc; } -/* copied from fs/locks.c with a name change */ -#define cifs_for_each_lock(inode, lockp) \ - for (lockp = &inode->i_flock; *lockp != NULL; \ - lockp = &(*lockp)->fl_next) - struct lock_to_push { struct list_head llist; __u64 offset; @@ -1128,8 +1123,9 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) { struct inode *inode = cfile->dentry->d_inode; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); - struct file_lock *flock, **before; - unsigned int count = 0, i = 0; + struct file_lock *flock; + struct file_lock_context *flctx = inode->i_flctx; + unsigned int count = 0, i; int rc = 0, xid, type; struct list_head locks_to_send, *el; struct lock_to_push *lck, *tmp; @@ -1137,10 +1133,12 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) xid = get_xid(); + if (!flctx) + goto out; + spin_lock(&inode->i_lock); - cifs_for_each_lock(inode, before) { - if ((*before)->fl_flags & FL_POSIX) - count++; + list_for_each(el, &flctx->flc_posix) { + count++; } spin_unlock(&inode->i_lock); @@ -1151,7 +1149,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) * added to the list while we are holding cinode->lock_sem that * protects locking operations of this inode. */ - for (; i < count; i++) { + for (i = 0; i < count; i++) { lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); if (!lck) { rc = -ENOMEM; @@ -1162,10 +1160,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) el = locks_to_send.next; spin_lock(&inode->i_lock); - cifs_for_each_lock(inode, before) { - flock = *before; - if ((flock->fl_flags & FL_POSIX) == 0) - continue; + list_for_each_entry(flock, &flctx->flc_posix, fl_list) { if (el == &locks_to_send) { /* * The list ended. We don't have enough allocated @@ -1185,7 +1180,6 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) lck->length = length; lck->type = type; lck->offset = flock->fl_start; - el = el->next; } spin_unlock(&inode->i_lock); diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index d12ff4e2dbe7..5300bb53835f 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -164,12 +164,15 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; + struct file_lock_context *flctx = inode->i_flctx; struct nlm_host *lockhost; + if (!flctx || list_empty_careful(&flctx->flc_posix)) + return 0; again: file->f_locks = 0; spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl; fl = fl->fl_next) { + list_for_each_entry(fl, &flctx->flc_posix, fl_list) { if (fl->fl_lmops != &nlmsvc_lock_operations) continue; @@ -223,18 +226,21 @@ nlm_file_inuse(struct nlm_file *file) { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; + struct file_lock_context *flctx = inode->i_flctx; if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) return 1; - spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl; fl = fl->fl_next) { - if (fl->fl_lmops == &nlmsvc_lock_operations) { - spin_unlock(&inode->i_lock); - return 1; + if (flctx && !list_empty_careful(&flctx->flc_posix)) { + spin_lock(&inode->i_lock); + list_for_each_entry(fl, &flctx->flc_posix, fl_list) { + if (fl->fl_lmops == &nlmsvc_lock_operations) { + spin_unlock(&inode->i_lock); + return 1; + } } + spin_unlock(&inode->i_lock); } - spin_unlock(&inode->i_lock); file->f_locks = 0; return 0; } diff --git a/fs/locks.c b/fs/locks.c index 055df53f19de..e50bb4d9e757 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -157,9 +157,6 @@ static int target_leasetype(struct file_lock *fl) int leases_enable = 1; int lease_break_time = 45; -#define for_each_lock(inode, lockp) \ - for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) - /* * The global file_lock_list is only used for displaying /proc/locks, so we * keep a list on each CPU, with each list protected by its own spinlock via @@ -218,6 +215,7 @@ locks_get_lock_context(struct inode *inode) goto out; INIT_LIST_HEAD(&new->flc_flock); + INIT_LIST_HEAD(&new->flc_posix); /* * Assign the pointer if it's not already assigned. If it is, then @@ -241,6 +239,7 @@ locks_free_lock_context(struct file_lock_context *ctx) { if (ctx) { WARN_ON_ONCE(!list_empty(&ctx->flc_flock)); + WARN_ON_ONCE(!list_empty(&ctx->flc_posix)); kmem_cache_free(flctx_cache, ctx); } } @@ -809,21 +808,26 @@ void posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; + struct file_lock_context *ctx; struct inode *inode = file_inode(filp); + ctx = inode->i_flctx; + if (!ctx || list_empty_careful(&ctx->flc_posix)) { + fl->fl_type = F_UNLCK; + return; + } + spin_lock(&inode->i_lock); - for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) { - if (!IS_POSIX(cfl)) - continue; - if (posix_locks_conflict(fl, cfl)) - break; + list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { + if (posix_locks_conflict(fl, cfl)) { + locks_copy_conflock(fl, cfl); + if (cfl->fl_nspid) + fl->fl_pid = pid_vnr(cfl->fl_nspid); + goto out; + } } - if (cfl) { - locks_copy_conflock(fl, cfl); - if (cfl->fl_nspid) - fl->fl_pid = pid_vnr(cfl->fl_nspid); - } else - fl->fl_type = F_UNLCK; + fl->fl_type = F_UNLCK; +out: spin_unlock(&inode->i_lock); return; } @@ -983,16 +987,20 @@ out: static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock) { - struct file_lock *fl; + struct file_lock *fl, *tmp; struct file_lock *new_fl = NULL; struct file_lock *new_fl2 = NULL; struct file_lock *left = NULL; struct file_lock *right = NULL; - struct file_lock **before; + struct file_lock_context *ctx; int error; bool added = false; LIST_HEAD(dispose); + ctx = locks_get_lock_context(inode); + if (!ctx) + return -ENOMEM; + /* * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. @@ -1013,8 +1021,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str * blocker's list of waiters and the global blocked_hash. */ if (request->fl_type != F_UNLCK) { - for_each_lock(inode, before) { - fl = *before; + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { if (!IS_POSIX(fl)) continue; if (!posix_locks_conflict(request, fl)) @@ -1044,29 +1051,25 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str if (request->fl_flags & FL_ACCESS) goto out; - /* - * Find the first old lock with the same owner as the new lock. - */ - - before = &inode->i_flock; - - /* First skip locks owned by other processes. */ - while ((fl = *before) && (!IS_POSIX(fl) || - !posix_same_owner(request, fl))) { - before = &fl->fl_next; + /* Find the first old lock with the same owner as the new lock */ + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { + if (posix_same_owner(request, fl)) + break; } /* Process locks with this owner. */ - while ((fl = *before) && posix_same_owner(request, fl)) { - /* Detect adjacent or overlapping regions (if same lock type) - */ + list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) { + if (!posix_same_owner(request, fl)) + break; + + /* Detect adjacent or overlapping regions (if same lock type) */ if (request->fl_type == fl->fl_type) { /* In all comparisons of start vs end, use * "start - 1" rather than "end + 1". If end * is OFFSET_MAX, end + 1 will become negative. */ if (fl->fl_end < request->fl_start - 1) - goto next_lock; + continue; /* If the next lock in the list has entirely bigger * addresses than the new one, insert the lock here. */ @@ -1087,18 +1090,17 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str else request->fl_end = fl->fl_end; if (added) { - locks_delete_lock(before, &dispose); + locks_delete_lock_ctx(fl, &dispose); continue; } request = fl; added = true; - } - else { + } else { /* Processing for different lock types is a bit * more complex. */ if (fl->fl_end < request->fl_start) - goto next_lock; + continue; if (fl->fl_start > request->fl_end) break; if (request->fl_type == F_UNLCK) @@ -1117,7 +1119,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str * one (This may happen several times). */ if (added) { - locks_delete_lock(before, &dispose); + locks_delete_lock_ctx(fl, &dispose); continue; } /* @@ -1133,15 +1135,11 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str locks_copy_lock(new_fl, request); request = new_fl; new_fl = NULL; - locks_delete_lock(before, &dispose); - locks_insert_lock(before, request); + locks_insert_lock_ctx(request, &fl->fl_list); + locks_delete_lock_ctx(fl, &dispose); added = true; } } - /* Go on to next lock. - */ - next_lock: - before = &fl->fl_next; } /* @@ -1166,7 +1164,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str goto out; } locks_copy_lock(new_fl, request); - locks_insert_lock(before, new_fl); + locks_insert_lock_ctx(new_fl, &fl->fl_list); new_fl = NULL; } if (right) { @@ -1177,7 +1175,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str left = new_fl2; new_fl2 = NULL; locks_copy_lock(left, right); - locks_insert_lock(before, left); + locks_insert_lock_ctx(left, &fl->fl_list); } right->fl_start = request->fl_end + 1; locks_wake_up_blocks(right); @@ -1257,22 +1255,29 @@ EXPORT_SYMBOL(posix_lock_file_wait); */ int locks_mandatory_locked(struct file *file) { + int ret; struct inode *inode = file_inode(file); + struct file_lock_context *ctx; struct file_lock *fl; + ctx = inode->i_flctx; + if (!ctx || list_empty_careful(&ctx->flc_posix)) + return 0; + /* * Search the lock list for this inode for any POSIX locks. */ spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!IS_POSIX(fl)) - continue; + ret = 0; + list_for_each_entry(fl, &ctx->flc_posix, fl_list) { if (fl->fl_owner != current->files && - fl->fl_owner != file) + fl->fl_owner != file) { + ret = -EAGAIN; break; + } } spin_unlock(&inode->i_lock); - return fl ? -EAGAIN : 0; + return ret; } /** @@ -2389,13 +2394,14 @@ out: void locks_remove_posix(struct file *filp, fl_owner_t owner) { struct file_lock lock; + struct file_lock_context *ctx = file_inode(filp)->i_flctx; /* * If there are no locks held on this file, we don't need to call * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - if (!file_inode(filp)->i_flock) + if (!ctx || list_empty(&ctx->flc_posix)) return; lock.fl_type = F_UNLCK; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 9f9f67b17e2b..3fb1caa3874d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -85,17 +85,17 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ { struct inode *inode = state->inode; struct file_lock *fl; - struct file_lock_context *flctx; + struct file_lock_context *flctx = inode->i_flctx; + struct list_head *list; int status = 0; - if (inode->i_flock == NULL && inode->i_flctx == NULL) + if (flctx == NULL) goto out; - /* Protect inode->i_flock using the i_lock */ + list = &flctx->flc_posix; spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & (FL_POSIX))) - continue; +restart: + list_for_each_entry(fl, list, fl_list) { if (nfs_file_open_context(fl->fl_file) != ctx) continue; spin_unlock(&inode->i_lock); @@ -104,19 +104,9 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ goto out; spin_lock(&inode->i_lock); } - - flctx = inode->i_flctx; - if (flctx) { - list_for_each_entry(fl, &flctx->flc_flock, fl_list) { - if (nfs_file_open_context(fl->fl_file) != ctx) - continue; - spin_unlock(&inode->i_lock); - status = nfs4_lock_delegation_recall(fl, state, - stateid); - if (status < 0) - goto out; - spin_lock(&inode->i_lock); - } + if (list == &flctx->flc_posix) { + list = &flctx->flc_flock; + goto restart; } spin_unlock(&inode->i_lock); out: diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 65c404bf61ae..6084c267f3a0 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1367,53 +1367,18 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ struct file_lock *fl; int status = 0; struct file_lock_context *flctx = inode->i_flctx; + struct list_head *list; - if (inode->i_flock == NULL && flctx == NULL) + if (flctx == NULL) return 0; + list = &flctx->flc_posix; + /* Guard against delegation returns and new lock/unlock calls */ down_write(&nfsi->rwsem); - /* Protect inode->i_flock using the BKL */ spin_lock(&inode->i_lock); - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) - continue; - if (nfs_file_open_context(fl->fl_file)->state != state) - continue; - spin_unlock(&inode->i_lock); - status = ops->recover_lock(state, fl); - switch (status) { - case 0: - break; - case -ESTALE: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_EXPIRED: - case -NFS4ERR_NO_GRACE: - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - goto out; - default: - printk(KERN_ERR "NFS: %s: unhandled error %d\n", - __func__, status); - case -ENOMEM: - case -NFS4ERR_DENIED: - case -NFS4ERR_RECLAIM_BAD: - case -NFS4ERR_RECLAIM_CONFLICT: - /* kill_proc(fl->fl_pid, SIGLOST, 1); */ - status = 0; - } - spin_lock(&inode->i_lock); - } - - if (!flctx) - goto out_unlock; - - list_for_each_entry(fl, &flctx->flc_flock, fl_list) { +restart: + list_for_each_entry(fl, list, fl_list) { if (nfs_file_open_context(fl->fl_file)->state != state) continue; spin_unlock(&inode->i_lock); @@ -1445,7 +1410,10 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ } spin_lock(&inode->i_lock); } -out_unlock: + if (list == &flctx->flc_posix) { + list = &flctx->flc_flock; + goto restart; + } spin_unlock(&inode->i_lock); out: up_write(&nfsi->rwsem); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index a3b62e15b444..29c7f33c9cf1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -831,12 +831,10 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, if (prev) { if (!nfs_match_open_context(req->wb_context, prev->wb_context)) return false; - if (req->wb_context->dentry->d_inode->i_flock != NULL && - !nfs_match_lock_context(req->wb_lock_context, - prev->wb_lock_context)) - return false; flctx = req->wb_context->dentry->d_inode->i_flctx; - if (flctx != NULL && !list_empty_careful(&flctx->flc_flock) && + if (flctx != NULL && + !(list_empty_careful(&flctx->flc_posix) && + list_empty_careful(&flctx->flc_flock)) && !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) return false; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e072aeb34195..784c13485b3f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1091,6 +1091,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) { struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_lock_context *l_ctx; + struct file_lock_context *flctx = file_inode(file)->i_flctx; struct nfs_page *req; int do_flush, status; /* @@ -1109,12 +1110,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page) do_flush = req->wb_page != page || req->wb_context != ctx; /* for now, flush if more than 1 request in page_group */ do_flush |= req->wb_this_page != req; - if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { - do_flush |= l_ctx->lockowner.l_owner != current->files - || l_ctx->lockowner.l_pid != current->tgid; - } - if (l_ctx && ctx->dentry->d_inode->i_flctx && - !list_empty_careful(&ctx->dentry->d_inode->i_flctx->flc_flock)) { + if (l_ctx && flctx && + !(list_empty_careful(&flctx->flc_posix) && + list_empty_careful(&flctx->flc_flock))) { do_flush |= l_ctx->lockowner.l_owner != current->files || l_ctx->lockowner.l_pid != current->tgid; } @@ -1202,26 +1200,24 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino return 0; if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) return 1; - if (!inode->i_flock && !flctx) + if (!flctx || (list_empty_careful(&flctx->flc_flock) && + list_empty_careful(&flctx->flc_posix))) return 0; /* Check to see if there are whole file write locks */ - spin_lock(&inode->i_lock); ret = 0; - - fl = inode->i_flock; - if (fl && is_whole_file_wrlock(fl)) { - ret = 1; - goto out; - } - - if (!list_empty(&flctx->flc_flock)) { + spin_lock(&inode->i_lock); + if (!list_empty(&flctx->flc_posix)) { + fl = list_first_entry(&flctx->flc_posix, struct file_lock, + fl_list); + if (is_whole_file_wrlock(fl)) + ret = 1; + } else if (!list_empty(&flctx->flc_flock)) { fl = list_first_entry(&flctx->flc_flock, struct file_lock, fl_list); if (fl->fl_type == F_WRLCK) ret = 1; } -out: spin_unlock(&inode->i_lock); return ret; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c06a1ba80d73..fad821991369 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5556,10 +5556,11 @@ out_nfserr: static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { - struct file_lock **flpp; + struct file_lock *fl; int status = false; struct file *filp = find_any_file(fp); struct inode *inode; + struct file_lock_context *flctx; if (!filp) { /* Any valid lock stateid should have some sort of access */ @@ -5568,15 +5569,18 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } inode = file_inode(filp); + flctx = inode->i_flctx; - spin_lock(&inode->i_lock); - for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { - if ((*flpp)->fl_owner == (fl_owner_t)lowner) { - status = true; - break; + if (flctx && !list_empty_careful(&flctx->flc_posix)) { + spin_lock(&inode->i_lock); + list_for_each_entry(fl, &flctx->flc_posix, fl_list) { + if (fl->fl_owner == (fl_owner_t)lowner) { + status = true; + break; + } } + spin_unlock(&inode->i_lock); } - spin_unlock(&inode->i_lock); fput(filp); return status; } diff --git a/fs/read_write.c b/fs/read_write.c index c0805c93b6fa..4060691e78f7 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -358,7 +358,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t return retval; } - if (unlikely(inode->i_flock && mandatory_lock(inode))) { + if (unlikely(inode->i_flctx && mandatory_lock(inode))) { retval = locks_mandatory_area( read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, pos, count); diff --git a/include/linux/fs.h b/include/linux/fs.h index dec0d38b05de..571f113588e9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -968,6 +968,7 @@ struct file_lock { struct file_lock_context { struct list_head flc_flock; + struct list_head flc_posix; }; /* The following constant reflects the upper bound of the file/locking space */ @@ -1971,7 +1972,7 @@ static inline int locks_verify_truncate(struct inode *inode, struct file *filp, loff_t size) { - if (inode->i_flock && mandatory_lock(inode)) + if (inode->i_flctx && mandatory_lock(inode)) return locks_mandatory_area( FLOCK_VERIFY_WRITE, inode, filp, size < inode->i_size ? size : inode->i_size, -- cgit v1.2.3 From 6109c85037e53443f29fd39c0de69f578a1cf285 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Jan 2015 15:05:57 -0500 Subject: locks: add a dedicated spinlock to protect i_flctx lists We can now add a dedicated spinlock without expanding struct inode. Change to using that to protect the various i_flctx lists. Signed-off-by: Jeff Layton Acked-by: Christoph Hellwig --- fs/ceph/locks.c | 8 ++--- fs/cifs/file.c | 8 ++--- fs/lockd/svcsubs.c | 12 ++++---- fs/locks.c | 87 +++++++++++++++++++++++++++-------------------------- fs/nfs/delegation.c | 8 ++--- fs/nfs/nfs4state.c | 8 ++--- fs/nfs/write.c | 4 +-- fs/nfsd/nfs4state.c | 4 +-- include/linux/fs.h | 1 + 9 files changed, 71 insertions(+), 69 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 19beeed83233..0303da8e3233 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -255,12 +255,12 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) ctx = inode->i_flctx; if (ctx) { - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); list_for_each_entry(lock, &ctx->flc_posix, fl_list) ++(*fcntl_count); list_for_each_entry(lock, &ctx->flc_flock, fl_list) ++(*flock_count); - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); } dout("counted %d flock locks and %d fcntl locks", *flock_count, *fcntl_count); @@ -288,7 +288,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, if (!ctx) return 0; - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); list_for_each_entry(lock, &ctx->flc_flock, fl_list) { ++seen_fcntl; if (seen_fcntl > num_fcntl_locks) { @@ -312,7 +312,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, ++l; } fail: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); return err; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ea78f6f81ce2..b65166eb111e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1136,11 +1136,11 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) if (!flctx) goto out; - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); list_for_each(el, &flctx->flc_posix) { count++; } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); INIT_LIST_HEAD(&locks_to_send); @@ -1159,7 +1159,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) } el = locks_to_send.next; - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); list_for_each_entry(flock, &flctx->flc_posix, fl_list) { if (el == &locks_to_send) { /* @@ -1181,7 +1181,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) lck->type = type; lck->offset = flock->fl_start; } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { int stored_rc; diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 5300bb53835f..665ef5a05183 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -171,7 +171,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, return 0; again: file->f_locks = 0; - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); list_for_each_entry(fl, &flctx->flc_posix, fl_list) { if (fl->fl_lmops != &nlmsvc_lock_operations) continue; @@ -183,7 +183,7 @@ again: if (match(lockhost, host)) { struct file_lock lock = *fl; - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; @@ -195,7 +195,7 @@ again: goto again; } } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); return 0; } @@ -232,14 +232,14 @@ nlm_file_inuse(struct nlm_file *file) return 1; if (flctx && !list_empty_careful(&flctx->flc_posix)) { - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); list_for_each_entry(fl, &flctx->flc_posix, fl_list) { if (fl->fl_lmops == &nlmsvc_lock_operations) { - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); return 1; } } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); } file->f_locks = 0; return 0; diff --git a/fs/locks.c b/fs/locks.c index d46e70567b99..a268d959ccd6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -161,7 +161,7 @@ int lease_break_time = 45; * The global file_lock_list is only used for displaying /proc/locks, so we * keep a list on each CPU, with each list protected by its own spinlock via * the file_lock_lglock. Note that alterations to the list also require that - * the relevant i_lock is held. + * the relevant flc_lock is held. */ DEFINE_STATIC_LGLOCK(file_lock_lglock); static DEFINE_PER_CPU(struct hlist_head, file_lock_list); @@ -189,13 +189,13 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * contrast to those that are acting as records of acquired locks). * * Note that when we acquire this lock in order to change the above fields, - * we often hold the i_lock as well. In certain cases, when reading the fields + * we often hold the flc_lock as well. In certain cases, when reading the fields * protected by this lock, we can skip acquiring it iff we already hold the - * i_lock. + * flc_lock. * * In particular, adding an entry to the fl_block list requires that you hold - * both the i_lock and the blocked_lock_lock (acquired in that order). Deleting - * an entry from the list however only requires the file_lock_lock. + * both the flc_lock and the blocked_lock_lock (acquired in that order). + * Deleting an entry from the list however only requires the file_lock_lock. */ static DEFINE_SPINLOCK(blocked_lock_lock); @@ -214,6 +214,7 @@ locks_get_lock_context(struct inode *inode) if (!new) goto out; + spin_lock_init(&new->flc_lock); INIT_LIST_HEAD(&new->flc_flock); INIT_LIST_HEAD(&new->flc_posix); INIT_LIST_HEAD(&new->flc_lease); @@ -557,7 +558,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) return fl1->fl_owner == fl2->fl_owner; } -/* Must be called with the i_lock held! */ +/* Must be called with the flc_lock held! */ static void locks_insert_global_locks(struct file_lock *fl) { lg_local_lock(&file_lock_lglock); @@ -566,12 +567,12 @@ static void locks_insert_global_locks(struct file_lock *fl) lg_local_unlock(&file_lock_lglock); } -/* Must be called with the i_lock held! */ +/* Must be called with the flc_lock held! */ static void locks_delete_global_locks(struct file_lock *fl) { /* * Avoid taking lock if already unhashed. This is safe since this check - * is done while holding the i_lock, and new insertions into the list + * is done while holding the flc_lock, and new insertions into the list * also require that it be held. */ if (hlist_unhashed(&fl->fl_link)) @@ -623,10 +624,10 @@ static void locks_delete_block(struct file_lock *waiter) * the order they blocked. The documentation doesn't require this but * it seems like the reasonable thing to do. * - * Must be called with both the i_lock and blocked_lock_lock held. The fl_block - * list itself is protected by the blocked_lock_lock, but by ensuring that the - * i_lock is also held on insertions we can avoid taking the blocked_lock_lock - * in some cases when we see that the fl_block list is empty. + * Must be called with both the flc_lock and blocked_lock_lock held. The + * fl_block list itself is protected by the blocked_lock_lock, but by ensuring + * that the flc_lock is also held on insertions we can avoid taking the + * blocked_lock_lock in some cases when we see that the fl_block list is empty. */ static void __locks_insert_block(struct file_lock *blocker, struct file_lock *waiter) @@ -638,7 +639,7 @@ static void __locks_insert_block(struct file_lock *blocker, locks_insert_global_blocked(waiter); } -/* Must be called with i_lock held. */ +/* Must be called with flc_lock held. */ static void locks_insert_block(struct file_lock *blocker, struct file_lock *waiter) { @@ -650,15 +651,15 @@ static void locks_insert_block(struct file_lock *blocker, /* * Wake up processes blocked waiting for blocker. * - * Must be called with the inode->i_lock held! + * Must be called with the inode->flc_lock held! */ static void locks_wake_up_blocks(struct file_lock *blocker) { /* * Avoid taking global lock if list is empty. This is safe since new - * blocked requests are only added to the list under the i_lock, and - * the i_lock is always held here. Note that removal from the fl_block - * list does not require the i_lock, so we must recheck list_empty() + * blocked requests are only added to the list under the flc_lock, and + * the flc_lock is always held here. Note that removal from the fl_block + * list does not require the flc_lock, so we must recheck list_empty() * after acquiring the blocked_lock_lock. */ if (list_empty(&blocker->fl_block)) @@ -768,7 +769,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) return; } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { if (posix_locks_conflict(fl, cfl)) { locks_copy_conflock(fl, cfl); @@ -779,7 +780,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) } fl->fl_type = F_UNLCK; out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); return; } EXPORT_SYMBOL(posix_test_lock); @@ -880,7 +881,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) return -ENOMEM; } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); if (request->fl_flags & FL_ACCESS) goto find_conflict; @@ -905,9 +906,9 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) * give it the opportunity to lock the file. */ if (found) { - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); cond_resched(); - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); } find_conflict: @@ -929,7 +930,7 @@ find_conflict: error = 0; out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); if (new_fl) locks_free_lock(new_fl); locks_dispose_list(&dispose); @@ -965,7 +966,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str new_fl2 = locks_alloc_lock(); } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); /* * New lock request. Walk all POSIX locks and look for conflicts. If * there are any, either return error or put the request on the @@ -1136,7 +1137,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str locks_wake_up_blocks(left); } out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); /* * Free any unused locks. */ @@ -1218,7 +1219,7 @@ int locks_mandatory_locked(struct file *file) /* * Search the lock list for this inode for any POSIX locks. */ - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); ret = 0; list_for_each_entry(fl, &ctx->flc_posix, fl_list) { if (fl->fl_owner != current->files && @@ -1227,7 +1228,7 @@ int locks_mandatory_locked(struct file *file) break; } } - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); return ret; } @@ -1346,7 +1347,7 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose) struct file_lock_context *ctx = inode->i_flctx; struct file_lock *fl, *tmp; - lockdep_assert_held(&inode->i_lock); + lockdep_assert_held(&ctx->flc_lock); list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) { trace_time_out_leases(inode, fl); @@ -1370,7 +1371,7 @@ any_leases_conflict(struct inode *inode, struct file_lock *breaker) struct file_lock_context *ctx = inode->i_flctx; struct file_lock *fl; - lockdep_assert_held(&inode->i_lock); + lockdep_assert_held(&ctx->flc_lock); list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (leases_conflict(fl, breaker)) @@ -1413,7 +1414,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) return error; } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); @@ -1463,11 +1464,11 @@ restart: break_time++; locks_insert_block(fl, new_fl); trace_break_lease_block(inode, new_fl); - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->fl_wait, !new_fl->fl_next, break_time); - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); trace_break_lease_unblock(inode, new_fl); locks_delete_block(new_fl); if (error >= 0) { @@ -1482,7 +1483,7 @@ restart: error = 0; } out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); locks_free_lock(new_fl); return error; @@ -1506,14 +1507,14 @@ void lease_get_mtime(struct inode *inode, struct timespec *time) struct file_lock *fl; if (ctx && !list_empty_careful(&ctx->flc_lease)) { - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); if (!list_empty(&ctx->flc_lease)) { fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list); if (fl->fl_type == F_WRLCK) has_lease = true; } - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); } if (has_lease) @@ -1556,7 +1557,7 @@ int fcntl_getlease(struct file *filp) LIST_HEAD(dispose); if (ctx && !list_empty_careful(&ctx->flc_lease)) { - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); time_out_leases(file_inode(filp), &dispose); list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (fl->fl_file != filp) @@ -1564,7 +1565,7 @@ int fcntl_getlease(struct file *filp) type = target_leasetype(fl); break; } - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); } return type; @@ -1632,7 +1633,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr return -EINVAL; } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); error = check_conflicting_open(dentry, arg); if (error) @@ -1699,7 +1700,7 @@ out_setup: if (lease->fl_lmops->lm_setup) lease->fl_lmops->lm_setup(lease, priv); out: - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); if (is_deleg) mutex_unlock(&inode->i_mutex); @@ -1722,7 +1723,7 @@ static int generic_delete_lease(struct file *filp) return error; } - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (fl->fl_file == filp) { victim = fl; @@ -1732,7 +1733,7 @@ static int generic_delete_lease(struct file *filp) trace_generic_delete_lease(inode, fl); if (victim) error = fl->fl_lmops->lm_change(&victim, F_UNLCK, &dispose); - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); return error; } @@ -2423,10 +2424,10 @@ locks_remove_lease(struct file *filp) if (!ctx || list_empty(&ctx->flc_lease)) return; - spin_lock(&inode->i_lock); + spin_lock(&ctx->flc_lock); list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) lease_modify(&fl, F_UNLCK, &dispose); - spin_unlock(&inode->i_lock); + spin_unlock(&ctx->flc_lock); locks_dispose_list(&dispose); } diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 3fb1caa3874d..8cdb2b28a104 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -93,22 +93,22 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ goto out; list = &flctx->flc_posix; - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); restart: list_for_each_entry(fl, list, fl_list) { if (nfs_file_open_context(fl->fl_file) != ctx) continue; - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); status = nfs4_lock_delegation_recall(fl, state, stateid); if (status < 0) goto out; - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); } if (list == &flctx->flc_posix) { list = &flctx->flc_flock; goto restart; } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); out: return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6084c267f3a0..a3bb22ab68c5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1376,12 +1376,12 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ /* Guard against delegation returns and new lock/unlock calls */ down_write(&nfsi->rwsem); - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); restart: list_for_each_entry(fl, list, fl_list) { if (nfs_file_open_context(fl->fl_file)->state != state) continue; - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); status = ops->recover_lock(state, fl); switch (status) { case 0: @@ -1408,13 +1408,13 @@ restart: /* kill_proc(fl->fl_pid, SIGLOST, 1); */ status = 0; } - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); } if (list == &flctx->flc_posix) { list = &flctx->flc_flock; goto restart; } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); out: up_write(&nfsi->rwsem); return status; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 784c13485b3f..4ae66f416eb9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1206,7 +1206,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino /* Check to see if there are whole file write locks */ ret = 0; - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); if (!list_empty(&flctx->flc_posix)) { fl = list_first_entry(&flctx->flc_posix, struct file_lock, fl_list); @@ -1218,7 +1218,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino if (fl->fl_type == F_WRLCK) ret = 1; } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); return ret; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fad821991369..80242f5bd621 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5572,14 +5572,14 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) flctx = inode->i_flctx; if (flctx && !list_empty_careful(&flctx->flc_posix)) { - spin_lock(&inode->i_lock); + spin_lock(&flctx->flc_lock); list_for_each_entry(fl, &flctx->flc_posix, fl_list) { if (fl->fl_owner == (fl_owner_t)lowner) { status = true; break; } } - spin_unlock(&inode->i_lock); + spin_unlock(&flctx->flc_lock); } fput(filp); return status; diff --git a/include/linux/fs.h b/include/linux/fs.h index ce0873af0b97..32eafa9b5c9f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -968,6 +968,7 @@ struct file_lock { }; struct file_lock_context { + spinlock_t flc_lock; struct list_head flc_flock; struct list_head flc_posix; struct list_head flc_lease; -- cgit v1.2.3 From de1414a654e66b81b5348dbc5259ecf2fb61655e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2015 10:42:36 +0100 Subject: fs: export inode_to_bdi and use it in favor of mapping->backing_dev_info Now that we got rid of the bdi abuse on character devices we can always use sb->s_bdi to get at the backing_dev_info for a file, except for the block device special case. Export inode_to_bdi and replace uses of mapping->backing_dev_info with it to prepare for the removal of mapping->backing_dev_info. Signed-off-by: Christoph Hellwig Reviewed-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- fs/btrfs/file.c | 2 +- fs/ceph/file.c | 2 +- fs/ext2/ialloc.c | 2 +- fs/ext4/super.c | 2 +- fs/fs-writeback.c | 3 ++- fs/fuse/file.c | 10 +++++----- fs/gfs2/aops.c | 2 +- fs/gfs2/super.c | 2 +- fs/nfs/filelayout/filelayout.c | 2 +- fs/nfs/write.c | 6 +++--- fs/ntfs/file.c | 3 ++- fs/ocfs2/file.c | 2 +- fs/xfs/xfs_file.c | 2 +- include/linux/backing-dev.h | 6 ++++-- include/trace/events/writeback.h | 6 +++--- mm/fadvise.c | 4 ++-- mm/filemap.c | 4 ++-- mm/filemap_xip.c | 3 ++- mm/page-writeback.c | 29 +++++++++++++---------------- mm/readahead.c | 4 ++-- mm/truncate.c | 2 +- mm/vmscan.c | 4 ++-- 22 files changed, 52 insertions(+), 50 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e4090259569b..835c04a874fd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1746,7 +1746,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, mutex_lock(&inode->i_mutex); - current->backing_dev_info = inode->i_mapping->backing_dev_info; + current->backing_dev_info = inode_to_bdi(inode); err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) { mutex_unlock(&inode->i_mutex); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ce74b394b49d..905986dd4c3c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -945,7 +945,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) mutex_lock(&inode->i_mutex); /* We can write back this queue in page reclaim */ - current->backing_dev_info = file->f_mapping->backing_dev_info; + current->backing_dev_info = inode_to_bdi(inode); err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 7d66fb0e4cca..6c14bb8322fa 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -170,7 +170,7 @@ static void ext2_preread_inode(struct inode *inode) struct ext2_group_desc * gdp; struct backing_dev_info *bdi; - bdi = inode->i_mapping->backing_dev_info; + bdi = inode_to_bdi(inode); if (bdi_read_congested(bdi)) return; if (bdi_write_congested(bdi)) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 74c5f53595fb..ad88e601a6cd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -334,7 +334,7 @@ static void save_error_info(struct super_block *sb, const char *func, static int block_device_ejected(struct super_block *sb) { struct inode *bd_inode = sb->s_bdev->bd_inode; - struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info; + struct backing_dev_info *bdi = inode_to_bdi(bd_inode); return bdi->dev == NULL; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e8116a44cc29..a20b1145f4d5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -66,7 +66,7 @@ int writeback_in_progress(struct backing_dev_info *bdi) } EXPORT_SYMBOL(writeback_in_progress); -static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) +struct backing_dev_info *inode_to_bdi(struct inode *inode) { struct super_block *sb = inode->i_sb; #ifdef CONFIG_BLOCK @@ -75,6 +75,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) #endif return sb->s_bdi; } +EXPORT_SYMBOL_GPL(inode_to_bdi); static inline struct inode *wb_inode(struct list_head *head) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 760b2c552197..19d80b82d344 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1159,7 +1159,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) mutex_lock(&inode->i_mutex); /* We can write back this queue in page reclaim */ - current->backing_dev_info = mapping->backing_dev_info; + current->backing_dev_info = inode_to_bdi(inode); err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) @@ -1464,7 +1464,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) { struct inode *inode = req->inode; struct fuse_inode *fi = get_fuse_inode(inode); - struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + struct backing_dev_info *bdi = inode_to_bdi(inode); int i; list_del(&req->writepages_entry); @@ -1658,7 +1658,7 @@ static int fuse_writepage_locked(struct page *page) req->end = fuse_writepage_end; req->inode = inode; - inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); + inc_bdi_stat(inode_to_bdi(inode), BDI_WRITEBACK); inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); spin_lock(&fc->lock); @@ -1768,7 +1768,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req, if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT || old_req->state == FUSE_REQ_PENDING)) { - struct backing_dev_info *bdi = page->mapping->backing_dev_info; + struct backing_dev_info *bdi = inode_to_bdi(page->mapping->host); copy_highpage(old_req->pages[0], page); spin_unlock(&fc->lock); @@ -1872,7 +1872,7 @@ static int fuse_writepages_fill(struct page *page, req->page_descs[req->num_pages].offset = 0; req->page_descs[req->num_pages].length = PAGE_SIZE; - inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK); + inc_bdi_stat(inode_to_bdi(inode), BDI_WRITEBACK); inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); err = 0; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 805b37fed638..4ad4f94edebe 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -289,7 +289,7 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - trace_wbc_writepage(wbc, mapping->backing_dev_info); + trace_wbc_writepage(wbc, inode_to_bdi(inode)); ret = __gfs2_jdata_writepage(page, wbc); if (unlikely(ret)) { diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 5b327f837de7..1666382b198d 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -743,7 +743,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); - struct backing_dev_info *bdi = metamapping->backing_dev_info; + struct backing_dev_info *bdi = inode_to_bdi(metamapping->host); int ret = 0; if (wbc->sync_mode == WB_SYNC_ALL) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 7afb52f6a25a..51aa889611cf 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1081,7 +1081,7 @@ mds_commit: spin_unlock(cinfo->lock); if (!cinfo->dreq) { inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, + inc_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), BDI_RECLAIMABLE); __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af3af685a9e3..298abcc5281b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -786,7 +786,7 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, spin_unlock(cinfo->lock); if (!cinfo->dreq) { inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, + inc_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), BDI_RECLAIMABLE); __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC); @@ -853,7 +853,7 @@ static void nfs_clear_page_commit(struct page *page) { dec_zone_page_state(page, NR_UNSTABLE_NFS); - dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); + dec_bdi_stat(inode_to_bdi(page_file_mapping(page)->host), BDI_RECLAIMABLE); } /* Called holding inode (/cinfo) lock */ @@ -1564,7 +1564,7 @@ void nfs_retry_commit(struct list_head *page_list, nfs_mark_request_commit(req, lseg, cinfo); if (!cinfo->dreq) { dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, + dec_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), BDI_RECLAIMABLE); } nfs_unlock_and_release_request(req); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 643faa44f22b..1da9b2d184dc 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -19,6 +19,7 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include @@ -2091,7 +2092,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, count = iov_length(iov, nr_segs); pos = *ppos; /* We can write back this queue in page reclaim. */ - current->backing_dev_info = mapping->backing_dev_info; + current->backing_dev_info = inode_to_bdi(inode); written = 0; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 3950693dd0f6..abe7d98d6178 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2363,7 +2363,7 @@ relock: goto out_dio; } } else { - current->backing_dev_info = file->f_mapping->backing_dev_info; + current->backing_dev_info = inode_to_bdi(inode); written = generic_perform_write(file, from, *ppos); if (likely(written >= 0)) iocb->ki_pos = *ppos + written; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 13e974e6a889..5684ac3e7d18 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -699,7 +699,7 @@ xfs_file_buffered_aio_write( iov_iter_truncate(from, count); /* We can write back this queue in page reclaim */ - current->backing_dev_info = mapping->backing_dev_info; + current->backing_dev_info = inode_to_bdi(inode); write_retry: trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 478f95d92d73..ed59dee03a71 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -106,6 +106,8 @@ struct backing_dev_info { #endif }; +struct backing_dev_info *inode_to_bdi(struct inode *inode); + int __must_check bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); @@ -303,12 +305,12 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi) static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) { - return bdi_cap_writeback_dirty(mapping->backing_dev_info); + return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host)); } static inline bool mapping_cap_account_dirty(struct address_space *mapping) { - return bdi_cap_account_dirty(mapping->backing_dev_info); + return bdi_cap_account_dirty(inode_to_bdi(mapping->host)); } static inline int bdi_sched_wait(void *word) diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index cee02d65ab3f..74f5207bd090 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -47,7 +47,7 @@ TRACE_EVENT(writeback_dirty_page, TP_fast_assign( strncpy(__entry->name, - mapping ? dev_name(mapping->backing_dev_info->dev) : "(unknown)", 32); + mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", 32); __entry->ino = mapping ? mapping->host->i_ino : 0; __entry->index = page->index; ), @@ -72,7 +72,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, ), TP_fast_assign( - struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + struct backing_dev_info *bdi = inode_to_bdi(inode); /* may be called for files on pseudo FSes w/ unregistered bdi */ strncpy(__entry->name, @@ -116,7 +116,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, TP_fast_assign( strncpy(__entry->name, - dev_name(inode->i_mapping->backing_dev_info->dev), 32); + dev_name(inode_to_bdi(inode)->dev), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; ), diff --git a/mm/fadvise.c b/mm/fadvise.c index 2ad7adf4f0a4..fac23ecf8d72 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -73,7 +73,7 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) else endbyte--; /* inclusive */ - bdi = mapping->backing_dev_info; + bdi = inode_to_bdi(mapping->host); switch (advice) { case POSIX_FADV_NORMAL: @@ -113,7 +113,7 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) case POSIX_FADV_NOREUSE: break; case POSIX_FADV_DONTNEED: - if (!bdi_write_congested(mapping->backing_dev_info)) + if (!bdi_write_congested(bdi)) __filemap_fdatawrite_range(mapping, offset, endbyte, WB_SYNC_NONE); diff --git a/mm/filemap.c b/mm/filemap.c index 673e4581a2e5..5d7c23c26f81 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -211,7 +211,7 @@ void __delete_from_page_cache(struct page *page, void *shadow) */ if (PageDirty(page) && mapping_cap_account_dirty(mapping)) { dec_zone_page_state(page, NR_FILE_DIRTY); - dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); + dec_bdi_stat(inode_to_bdi(mapping->host), BDI_RECLAIMABLE); } } @@ -2565,7 +2565,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) size_t count = iov_iter_count(from); /* We can write back this queue in page reclaim */ - current->backing_dev_info = mapping->backing_dev_info; + current->backing_dev_info = inode_to_bdi(inode); err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) goto out; diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 0d105aeff82f..26897fbfbe19 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -410,7 +411,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, count = len; /* We can write back this queue in page reclaim */ - current->backing_dev_info = mapping->backing_dev_info; + current->backing_dev_info = inode_to_bdi(inode); ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode)); if (ret) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6f4335238e33..d4cbb4bd7d1c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1351,7 +1351,7 @@ static void balance_dirty_pages(struct address_space *mapping, unsigned long task_ratelimit; unsigned long dirty_ratelimit; unsigned long pos_ratio; - struct backing_dev_info *bdi = mapping->backing_dev_info; + struct backing_dev_info *bdi = inode_to_bdi(mapping->host); bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT; unsigned long start_time = jiffies; @@ -1574,7 +1574,7 @@ DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0; */ void balance_dirty_pages_ratelimited(struct address_space *mapping) { - struct backing_dev_info *bdi = mapping->backing_dev_info; + struct backing_dev_info *bdi = inode_to_bdi(mapping->host); int ratelimit; int *p; @@ -1929,7 +1929,7 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - trace_wbc_writepage(wbc, mapping->backing_dev_info); + trace_wbc_writepage(wbc, inode_to_bdi(mapping->host)); ret = (*writepage)(page, wbc, data); if (unlikely(ret)) { if (ret == AOP_WRITEPAGE_ACTIVATE) { @@ -2094,10 +2094,12 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) trace_writeback_dirty_page(page, mapping); if (mapping_cap_account_dirty(mapping)) { + struct backing_dev_info *bdi = inode_to_bdi(mapping->host); + __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_zone_page_state(page, NR_DIRTIED); - __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); - __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); + __inc_bdi_stat(bdi, BDI_RECLAIMABLE); + __inc_bdi_stat(bdi, BDI_DIRTIED); task_io_account_write(PAGE_CACHE_SIZE); current->nr_dirtied++; this_cpu_inc(bdp_ratelimits); @@ -2156,7 +2158,7 @@ void account_page_redirty(struct page *page) if (mapping && mapping_cap_account_dirty(mapping)) { current->nr_dirtied--; dec_zone_page_state(page, NR_DIRTIED); - dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); + dec_bdi_stat(inode_to_bdi(mapping->host), BDI_DIRTIED); } } EXPORT_SYMBOL(account_page_redirty); @@ -2295,7 +2297,7 @@ int clear_page_dirty_for_io(struct page *page) */ if (TestClearPageDirty(page)) { dec_zone_page_state(page, NR_FILE_DIRTY); - dec_bdi_stat(mapping->backing_dev_info, + dec_bdi_stat(inode_to_bdi(mapping->host), BDI_RECLAIMABLE); return 1; } @@ -2315,7 +2317,7 @@ int test_clear_page_writeback(struct page *page) memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags); if (mapping) { - struct backing_dev_info *bdi = mapping->backing_dev_info; + struct backing_dev_info *bdi = inode_to_bdi(mapping->host); unsigned long flags; spin_lock_irqsave(&mapping->tree_lock, flags); @@ -2352,7 +2354,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write) memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags); if (mapping) { - struct backing_dev_info *bdi = mapping->backing_dev_info; + struct backing_dev_info *bdi = inode_to_bdi(mapping->host); unsigned long flags; spin_lock_irqsave(&mapping->tree_lock, flags); @@ -2406,12 +2408,7 @@ EXPORT_SYMBOL(mapping_tagged); */ void wait_for_stable_page(struct page *page) { - struct address_space *mapping = page_mapping(page); - struct backing_dev_info *bdi = mapping->backing_dev_info; - - if (!bdi_cap_stable_pages_required(bdi)) - return; - - wait_on_page_writeback(page); + if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host))) + wait_on_page_writeback(page); } EXPORT_SYMBOL_GPL(wait_for_stable_page); diff --git a/mm/readahead.c b/mm/readahead.c index 17b9172ec37f..935675844b2e 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -27,7 +27,7 @@ void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping) { - ra->ra_pages = mapping->backing_dev_info->ra_pages; + ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages; ra->prev_pos = -1; } EXPORT_SYMBOL_GPL(file_ra_state_init); @@ -541,7 +541,7 @@ page_cache_async_readahead(struct address_space *mapping, /* * Defer asynchronous read-ahead on IO congestion. */ - if (bdi_read_congested(mapping->backing_dev_info)) + if (bdi_read_congested(inode_to_bdi(mapping->host))) return; /* do read-ahead */ diff --git a/mm/truncate.c b/mm/truncate.c index f1e4d6052369..ddec5a5966d7 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -112,7 +112,7 @@ void cancel_dirty_page(struct page *page, unsigned int account_size) struct address_space *mapping = page->mapping; if (mapping && mapping_cap_account_dirty(mapping)) { dec_zone_page_state(page, NR_FILE_DIRTY); - dec_bdi_stat(mapping->backing_dev_info, + dec_bdi_stat(inode_to_bdi(mapping->host), BDI_RECLAIMABLE); if (account_size) task_io_account_cancelled_write(account_size); diff --git a/mm/vmscan.c b/mm/vmscan.c index ab2505c3ef54..e00a16393f21 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -497,7 +497,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, } if (mapping->a_ops->writepage == NULL) return PAGE_ACTIVATE; - if (!may_write_to_queue(mapping->backing_dev_info, sc)) + if (!may_write_to_queue(inode_to_bdi(mapping->host), sc)) return PAGE_KEEP; if (clear_page_dirty_for_io(page)) { @@ -876,7 +876,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ mapping = page_mapping(page); if (((dirty || writeback) && mapping && - bdi_write_congested(mapping->backing_dev_info)) || + bdi_write_congested(inode_to_bdi(mapping->host))) || (writeback && PageReclaim(page))) nr_congested++; -- cgit v1.2.3 From abde71f4d3c027a30f8d725e1e22001313b4481a Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Mon, 9 Jun 2014 13:12:20 -0700 Subject: pnfs: Add nfs_rpc_ops in calls to nfs_initiate_pgio Signed-off-by: Tom Haynes --- fs/nfs/filelayout/filelayout.c | 4 ++-- fs/nfs/internal.h | 1 + fs/nfs/pagelist.c | 6 ++++-- fs/nfs/read.c | 3 ++- fs/nfs/write.c | 6 +++--- include/linux/nfs_page.h | 1 + 6 files changed, 13 insertions(+), 8 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index bc36ed350a68..25c4896887ca 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -501,7 +501,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) hdr->mds_offset = offset; /* Perform an asynchronous read to ds */ - nfs_initiate_pgio(ds_clnt, hdr, + nfs_initiate_pgio(ds_clnt, hdr, NFS_PROTO(hdr->inode), &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; } @@ -542,7 +542,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - nfs_initiate_pgio(ds_clnt, hdr, + nfs_initiate_pgio(ds_clnt, hdr, NFS_PROTO(hdr->inode), &filelayout_write_call_ops, sync, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5543850268d2..1d15ffa94937 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -251,6 +251,7 @@ void nfs_pgio_header_free(struct nfs_pgio_header *); void nfs_pgio_data_destroy(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *, + const struct nfs_rpc_ops *, const struct rpc_call_ops *, int, int); void nfs_free_request(struct nfs_page *req); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2b5e769beb16..35a2626a6922 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -597,6 +597,7 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) } int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, + const struct nfs_rpc_ops *rpc_ops, const struct rpc_call_ops *call_ops, int how, int flags) { struct rpc_task *task; @@ -616,7 +617,7 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, }; int ret = 0; - hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how); + hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); dprintk("NFS: %5u initiated pgio call " "(req %s/%llu, %u bytes @ offset %llu)\n", @@ -792,7 +793,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) ret = nfs_generic_pgio(desc, hdr); if (ret == 0) ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), - hdr, desc->pg_rpc_callops, + hdr, NFS_PROTO(hdr->inode), + desc->pg_rpc_callops, desc->pg_ioflags, 0); return ret; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c91a4799c562..092ab499f2b6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -168,13 +168,14 @@ out: static void nfs_initiate_read(struct nfs_pgio_header *hdr, struct rpc_message *msg, + const struct nfs_rpc_ops *rpc_ops, struct rpc_task_setup *task_setup_data, int how) { struct inode *inode = hdr->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; task_setup_data->flags |= swap_flags; - NFS_PROTO(inode)->read_setup(hdr, msg); + rpc_ops->read_setup(hdr, msg); } static void diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af3af685a9e3..54d4857e0e2b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1240,15 +1240,15 @@ static int flush_task_priority(int how) static void nfs_initiate_write(struct nfs_pgio_header *hdr, struct rpc_message *msg, + const struct nfs_rpc_ops *rpc_ops, struct rpc_task_setup *task_setup_data, int how) { - struct inode *inode = hdr->inode; int priority = flush_task_priority(how); task_setup_data->priority = priority; - NFS_PROTO(inode)->write_setup(hdr, msg); + rpc_ops->write_setup(hdr, msg); - nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, + nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client, &task_setup_data->rpc_client, msg, hdr); } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 6c3e06ee2fb7..4c3aa809ab95 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -69,6 +69,7 @@ struct nfs_rw_ops { struct inode *); void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); void (*rw_initiate)(struct nfs_pgio_header *, struct rpc_message *, + const struct nfs_rpc_ops *, struct rpc_task_setup *, int); }; -- cgit v1.2.3 From c36aae9ad95afa2f9a9e9109d989c21af221fabd Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 9 Jun 2014 07:10:14 +0800 Subject: nfs: allow different protocol in nfs_initiate_commit pnfs flexfile layout client may want to use NFSv3 ops rather than the default MDS v4 ops. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/filelayout/filelayout.c | 2 +- fs/nfs/internal.h | 1 + fs/nfs/pnfs_nfs.c | 1 + fs/nfs/write.c | 7 ++++--- 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 25c4896887ca..e5a3c5b1398f 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1055,7 +1055,7 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; - return nfs_initiate_commit(ds_clnt, data, + return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode), &filelayout_commit_call_ops, how, RPC_TASK_SOFTCONN); out_err: diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1d15ffa94937..98dee834e9d6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -436,6 +436,7 @@ extern void nfs_write_prepare(struct rpc_task *task, void *calldata); extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); extern int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, + const struct nfs_rpc_ops *nfs_ops, const struct rpc_call_ops *call_ops, int how, int flags); extern void nfs_init_commit(struct nfs_commit_data *data, diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 23c851d4c9a9..c87f664587ee 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -278,6 +278,7 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, if (!data->lseg) { nfs_init_commit(data, mds_pages, NULL, cinfo); nfs_initiate_commit(NFS_CLIENT(inode), data, + NFS_PROTO(data->inode), data->mds_ops, how, 0); } else { struct pnfs_commit_bucket *buckets; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 54d4857e0e2b..8800bd3b235d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1465,6 +1465,7 @@ void nfs_commitdata_release(struct nfs_commit_data *data) EXPORT_SYMBOL_GPL(nfs_commitdata_release); int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, + const struct nfs_rpc_ops *nfs_ops, const struct rpc_call_ops *call_ops, int how, int flags) { @@ -1486,7 +1487,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, .priority = priority, }; /* Set up the initial task struct. */ - NFS_PROTO(data->inode)->commit_setup(data, &msg); + nfs_ops->commit_setup(data, &msg); dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); @@ -1589,8 +1590,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); atomic_inc(&cinfo->mds->rpcs_out); - return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, - how, 0); + return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), + data->mds_ops, how, 0); out_bad: nfs_retry_commit(head, NULL, cinfo); cinfo->completion_ops->error_cleanup(NFS_I(inode)); -- cgit v1.2.3 From 309a1d65b11de24d172f7dbbc21583ebd649c912 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 5 Sep 2014 16:34:29 -0400 Subject: nfs: handle overlapping reqs in lock_and_join This is needed for mirrored DS support, where multuple requests cover the same range. Signed-off-by: Weston Andros Adamson --- fs/nfs/write.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8800bd3b235d..e9974574b19a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -473,13 +473,18 @@ try_again: do { /* * Subrequests are always contiguous, non overlapping - * and in order. If not, it's a programming error. + * and in order - but may be repeated (mirrored writes). */ - WARN_ON_ONCE(subreq->wb_offset != - (head->wb_offset + total_bytes)); - - /* keep track of how many bytes this group covers */ - total_bytes += subreq->wb_bytes; + if (subreq->wb_offset == (head->wb_offset + total_bytes)) { + /* keep track of how many bytes this group covers */ + total_bytes += subreq->wb_bytes; + } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset || + ((subreq->wb_offset + subreq->wb_bytes) > + (head->wb_offset + total_bytes)))) { + nfs_page_group_unlock(head); + spin_unlock(&inode->i_lock); + return ERR_PTR(-EIO); + } if (!nfs_lock_request(subreq)) { /* releases page group bit lock and -- cgit v1.2.3 From b57ff1303a2d4d1484c7a82bd80a3e014d6cdf5e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 5 Sep 2014 18:20:21 -0400 Subject: pnfs: pass ds_commit_idx through the commit path Pass ds_commit_idx through the nfs commit path. It's used to select the commit bucket when using pnfs and is ignored when not using pnfs. Several functions had to be changed: nfs_retry_commit, nfs_mark_request_commit, pnfs_mark_request_commit and the pnfs layout driver .mark_request_commit functions. Signed-off-by: Tom Haynes --- fs/nfs/direct.c | 5 +++-- fs/nfs/filelayout/filelayout.c | 3 ++- fs/nfs/internal.h | 6 ++++-- fs/nfs/pnfs.h | 9 +++++---- fs/nfs/pnfs_nfs.c | 4 ++-- fs/nfs/write.c | 14 ++++++++------ 6 files changed, 24 insertions(+), 17 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d7c2d430b04d..1ee41d74c31c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -649,7 +649,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_list_remove_request(req); if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { /* Note the rewrite will go through mds */ - nfs_mark_request_commit(req, NULL, &cinfo); + nfs_mark_request_commit(req, NULL, &cinfo, 0); } else nfs_release_request(req); nfs_unlock_and_release_request(req); @@ -748,7 +748,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); if (request_commit) { kref_get(&req->wb_kref); - nfs_mark_request_commit(req, hdr->lseg, &cinfo); + nfs_mark_request_commit(req, hdr->lseg, &cinfo, + hdr->ds_commit_idx); } nfs_unlock_and_release_request(req); } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 520cbc53e035..3c9769441f36 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -954,7 +954,8 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) static void filelayout_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, + u32 ds_commit_idx) { struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e9305e98b782..05f9a87cdab4 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -450,13 +450,15 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst, struct nfs_commit_info *cinfo); void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo); + struct nfs_commit_info *cinfo, + u32 ds_commit_idx); int nfs_write_need_commit(struct nfs_pgio_header *); int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo); + struct nfs_commit_info *cinfo, + u32 ds_commit_idx); void nfs_commitdata_release(struct nfs_commit_data *data); void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, struct nfs_commit_info *cinfo); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 59c831efb5de..a0ab81cc9cf3 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -137,7 +137,8 @@ struct pnfs_layoutdriver_type { struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode); void (*mark_request_commit) (struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo); + struct nfs_commit_info *cinfo, + u32 ds_commit_idx); void (*clear_request_commit) (struct nfs_page *req, struct nfs_commit_info *cinfo); int (*scan_commit_lists) (struct nfs_commit_info *cinfo, @@ -389,14 +390,14 @@ pnfs_generic_mark_devid_invalid(struct nfs4_deviceid_node *node) static inline bool pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, u32 ds_commit_idx) { struct inode *inode = req->wb_context->dentry->d_inode; struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; if (lseg == NULL || ld->mark_request_commit == NULL) return false; - ld->mark_request_commit(req, lseg, cinfo); + ld->mark_request_commit(req, lseg, cinfo, ds_commit_idx); return true; } @@ -574,7 +575,7 @@ pnfs_get_ds_info(struct inode *inode) static inline bool pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, u32 ds_commit_idx) { return false; } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 55bff41180e8..fdc4f6562bb7 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -188,7 +188,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) bucket = &fl_cinfo->buckets[i]; if (list_empty(&bucket->committing)) continue; - nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); + nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo, i); spin_lock(cinfo->lock); freeme = bucket->clseg; bucket->clseg = NULL; @@ -247,7 +247,7 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, list_add(&data->pages, &list); nreq++; } else { - nfs_retry_commit(mds_pages, NULL, cinfo); + nfs_retry_commit(mds_pages, NULL, cinfo, 0); pnfs_generic_retry_commit(cinfo, 0); cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e9974574b19a..2bee165fddcf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -847,9 +847,9 @@ EXPORT_SYMBOL_GPL(nfs_init_cinfo); */ void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, u32 ds_commit_idx) { - if (pnfs_mark_request_commit(req, lseg, cinfo)) + if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx)) return; nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); } @@ -905,7 +905,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) } if (nfs_write_need_commit(hdr)) { memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); - nfs_mark_request_commit(req, hdr->lseg, &cinfo); + nfs_mark_request_commit(req, hdr->lseg, &cinfo, + 0); goto next; } remove_req: @@ -1560,14 +1561,15 @@ EXPORT_SYMBOL_GPL(nfs_init_commit); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, + u32 ds_commit_idx) { struct nfs_page *req; while (!list_empty(page_list)) { req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); - nfs_mark_request_commit(req, lseg, cinfo); + nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx); if (!cinfo->dreq) { dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, @@ -1598,7 +1600,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), data->mds_ops, how, 0); out_bad: - nfs_retry_commit(head, NULL, cinfo); + nfs_retry_commit(head, NULL, cinfo, 0); cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } -- cgit v1.2.3 From a7d42ddb3099727f58366fa006f850a219cce6c8 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 19 Sep 2014 10:55:07 -0400 Subject: nfs: add mirroring support to pgio layer This patch adds mirrored write support to the pgio layer. The default is to use one mirror, but pgio callers may define callbacks to change this to any value up to the (arbitrarily selected) limit of 16. The basic idea is to break out members of nfs_pageio_descriptor that cannot be shared between mirrored DSes and put them in a new structure. Signed-off-by: Weston Andros Adamson --- fs/nfs/direct.c | 17 ++- fs/nfs/internal.h | 1 + fs/nfs/objlayout/objio_osd.c | 3 +- fs/nfs/pagelist.c | 270 +++++++++++++++++++++++++++++++++++-------- fs/nfs/pnfs.c | 26 +++-- fs/nfs/read.c | 30 ++++- fs/nfs/write.c | 10 +- include/linux/nfs_page.h | 20 +++- include/linux/nfs_xdr.h | 1 + 9 files changed, 311 insertions(+), 67 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1ee41d74c31c..0178d4fe8ab7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -360,8 +360,14 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) spin_lock(&dreq->lock); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) dreq->error = hdr->error; - else - dreq->count += hdr->good_bytes; + else { + /* + * FIXME: right now this only accounts for bytes written + * to the first mirror + */ + if (hdr->pgio_mirror_idx == 0) + dreq->count += hdr->good_bytes; + } spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { @@ -724,7 +730,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) dreq->error = hdr->error; } if (dreq->error == 0) { - dreq->count += hdr->good_bytes; + /* + * FIXME: right now this only accounts for bytes written + * to the first mirror + */ + if (hdr->pgio_mirror_idx == 0) + dreq->count += hdr->good_bytes; if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) request_commit = true; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 05f9a87cdab4..ef1c703e487b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -469,6 +469,7 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq); int nfs_key_timeout_notify(struct file *filp, struct inode *inode); bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index d00778077df1..9a5f2ee6001f 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -537,11 +537,12 @@ int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { + struct nfs_pgio_mirror *mirror = &pgio->pg_mirrors[pgio->pg_mirror_idx]; unsigned int size; size = pnfs_generic_pg_test(pgio, prev, req); - if (!size || pgio->pg_count + req->wb_bytes > + if (!size || mirror->pg_count + req->wb_bytes > (unsigned long)pgio->pg_layout_private) return 0; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1c031878c752..eec12b75c232 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -46,17 +46,22 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)) { - hdr->req = nfs_list_entry(desc->pg_list.next); + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + + hdr->req = nfs_list_entry(mirror->pg_list.next); hdr->inode = desc->pg_inode; hdr->cred = hdr->req->wb_context->cred; hdr->io_start = req_offset(hdr->req); - hdr->good_bytes = desc->pg_count; + hdr->good_bytes = mirror->pg_count; hdr->dreq = desc->pg_dreq; hdr->layout_private = desc->pg_layout_private; hdr->release = release; hdr->completion_ops = desc->pg_completion_ops; if (hdr->completion_ops->init_hdr) hdr->completion_ops->init_hdr(hdr); + + hdr->pgio_mirror_idx = desc->pg_mirror_idx; } EXPORT_SYMBOL_GPL(nfs_pgheader_init); @@ -480,7 +485,10 @@ nfs_wait_on_request(struct nfs_page *req) size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) { - if (desc->pg_count > desc->pg_bsize) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + + if (mirror->pg_count > mirror->pg_bsize) { /* should never happen */ WARN_ON_ONCE(1); return 0; @@ -490,11 +498,11 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, * Limit the request size so that we can still allocate a page array * for it without upsetting the slab allocator. */ - if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * + if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * sizeof(struct page) > PAGE_SIZE) return 0; - return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); + return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); } EXPORT_SYMBOL_GPL(nfs_generic_pg_test); @@ -651,10 +659,18 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror; + u32 midx; + set_bit(NFS_IOHDR_REDO, &hdr->flags); nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + /* TODO: Make sure it's right to clean up all mirrors here + * and not just hdr->pgio_mirror_idx */ + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + mirror = &desc->pg_mirrors[midx]; + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); + } return -ENOMEM; } @@ -671,6 +687,17 @@ static void nfs_pgio_release(void *calldata) hdr->completion_ops->completion(hdr); } +static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, + unsigned int bsize) +{ + INIT_LIST_HEAD(&mirror->pg_list); + mirror->pg_bytes_written = 0; + mirror->pg_count = 0; + mirror->pg_bsize = bsize; + mirror->pg_base = 0; + mirror->pg_recoalesce = 0; +} + /** * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor @@ -687,13 +714,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, size_t bsize, int io_flags) { - INIT_LIST_HEAD(&desc->pg_list); - desc->pg_bytes_written = 0; - desc->pg_count = 0; - desc->pg_bsize = bsize; - desc->pg_base = 0; + struct nfs_pgio_mirror *new; + int i; + desc->pg_moreio = 0; - desc->pg_recoalesce = 0; desc->pg_inode = inode; desc->pg_ops = pg_ops; desc->pg_completion_ops = compl_ops; @@ -703,6 +727,26 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_lseg = NULL; desc->pg_dreq = NULL; desc->pg_layout_private = NULL; + desc->pg_bsize = bsize; + + desc->pg_mirror_count = 1; + desc->pg_mirror_idx = 0; + + if (pg_ops->pg_get_mirror_count) { + /* until we have a request, we don't have an lseg and no + * idea how many mirrors there will be */ + new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, + sizeof(struct nfs_pgio_mirror), GFP_KERNEL); + desc->pg_mirrors_dynamic = new; + desc->pg_mirrors = new; + + for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++) + nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize); + } else { + desc->pg_mirrors_dynamic = NULL; + desc->pg_mirrors = desc->pg_mirrors_static; + nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); + } } EXPORT_SYMBOL_GPL(nfs_pageio_init); @@ -738,14 +782,16 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_page *req; struct page **pages, *last_page; - struct list_head *head = &desc->pg_list; + struct list_head *head = &mirror->pg_list; struct nfs_commit_info cinfo; unsigned int pagecount, pageused; - pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); + pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); if (!nfs_pgarray_set(&hdr->page_array, pagecount)) return nfs_pgio_error(desc, hdr); @@ -773,7 +819,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); + nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -781,12 +827,17 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror; struct nfs_pgio_header *hdr; int ret; + mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + /* TODO: make sure this is right with mirroring - or + * should it back out all mirrors? */ + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); return -ENOMEM; } nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); @@ -801,6 +852,49 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) return ret; } +/* + * nfs_pageio_setup_mirroring - determine if mirroring is to be used + * by calling the pg_get_mirror_count op + */ +static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + int mirror_count = 1; + + if (!pgio->pg_ops->pg_get_mirror_count) + return 0; + + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); + + if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) + return -EINVAL; + + if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic)) + return -EINVAL; + + pgio->pg_mirror_count = mirror_count; + + return 0; +} + +/* + * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) + */ +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_mirror_count = 1; + pgio->pg_mirror_idx = 0; +} + +static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_mirror_count = 1; + pgio->pg_mirror_idx = 0; + pgio->pg_mirrors = pgio->pg_mirrors_static; + kfree(pgio->pg_mirrors_dynamic); + pgio->pg_mirrors_dynamic = NULL; +} + static bool nfs_match_open_context(const struct nfs_open_context *ctx1, const struct nfs_open_context *ctx2) { @@ -867,19 +961,22 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_page *prev = NULL; - if (desc->pg_count != 0) { - prev = nfs_list_entry(desc->pg_list.prev); + + if (mirror->pg_count != 0) { + prev = nfs_list_entry(mirror->pg_list.prev); } else { if (desc->pg_ops->pg_init) desc->pg_ops->pg_init(desc, req); - desc->pg_base = req->wb_pgbase; + mirror->pg_base = req->wb_pgbase; } if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; nfs_list_remove_request(req); - nfs_list_add_request(req, &desc->pg_list); - desc->pg_count += req->wb_bytes; + nfs_list_add_request(req, &mirror->pg_list); + mirror->pg_count += req->wb_bytes; return 1; } @@ -888,16 +985,19 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, */ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { - if (!list_empty(&desc->pg_list)) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + + if (!list_empty(&mirror->pg_list)) { int error = desc->pg_ops->pg_doio(desc); if (error < 0) desc->pg_error = error; else - desc->pg_bytes_written += desc->pg_count; + mirror->pg_bytes_written += mirror->pg_count; } - if (list_empty(&desc->pg_list)) { - desc->pg_count = 0; - desc->pg_base = 0; + if (list_empty(&mirror->pg_list)) { + mirror->pg_count = 0; + mirror->pg_base = 0; } } @@ -915,10 +1015,14 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_page *subreq; unsigned int bytes_left = 0; unsigned int offset, pgbase; + WARN_ON_ONCE(desc->pg_mirror_idx >= desc->pg_mirror_count); + nfs_page_group_lock(req, false); subreq = req; @@ -938,7 +1042,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, nfs_pageio_doio(desc); if (desc->pg_error < 0) return 0; - if (desc->pg_recoalesce) + if (mirror->pg_recoalesce) return 0; /* retry add_request for this subreq */ nfs_page_group_lock(req, false); @@ -976,14 +1080,16 @@ err_ptr: static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; LIST_HEAD(head); do { - list_splice_init(&desc->pg_list, &head); - desc->pg_bytes_written -= desc->pg_count; - desc->pg_count = 0; - desc->pg_base = 0; - desc->pg_recoalesce = 0; + list_splice_init(&mirror->pg_list, &head); + mirror->pg_bytes_written -= mirror->pg_count; + mirror->pg_count = 0; + mirror->pg_base = 0; + mirror->pg_recoalesce = 0; + desc->pg_moreio = 0; while (!list_empty(&head)) { @@ -997,11 +1103,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) return 0; break; } - } while (desc->pg_recoalesce); + } while (mirror->pg_recoalesce); return 1; } -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, +static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { int ret; @@ -1014,9 +1120,78 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, break; ret = nfs_do_recoalesce(desc); } while (ret); + return ret; } +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + u32 midx; + unsigned int pgbase, offset, bytes; + struct nfs_page *dupreq, *lastreq; + + pgbase = req->wb_pgbase; + offset = req->wb_offset; + bytes = req->wb_bytes; + + nfs_pageio_setup_mirroring(desc, req); + + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + if (midx) { + nfs_page_group_lock(req, false); + + /* find the last request */ + for (lastreq = req->wb_head; + lastreq->wb_this_page != req->wb_head; + lastreq = lastreq->wb_this_page) + ; + + dupreq = nfs_create_request(req->wb_context, + req->wb_page, lastreq, pgbase, bytes); + + if (IS_ERR(dupreq)) { + nfs_page_group_unlock(req); + return 0; + } + + nfs_lock_request(dupreq); + nfs_page_group_unlock(req); + dupreq->wb_offset = offset; + dupreq->wb_index = req->wb_index; + } else + dupreq = req; + + desc->pg_mirror_idx = midx; + if (!nfs_pageio_add_request_mirror(desc, dupreq)) + return 0; + } + + return 1; +} + +/* + * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an + * nfs_pageio_descriptor + * @desc: pointer to io descriptor + */ +static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, + u32 mirror_idx) +{ + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; + u32 restore_idx = desc->pg_mirror_idx; + + desc->pg_mirror_idx = mirror_idx; + for (;;) { + nfs_pageio_doio(desc); + if (!mirror->pg_recoalesce) + break; + if (!nfs_do_recoalesce(desc)) + break; + } + desc->pg_mirror_idx = restore_idx; +} + /* * nfs_pageio_resend - Transfer requests to new descriptor and resend * @hdr - the pgio header to move request from @@ -1055,16 +1230,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_resend); */ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) { - for (;;) { - nfs_pageio_doio(desc); - if (!desc->pg_recoalesce) - break; - if (!nfs_do_recoalesce(desc)) - break; - } + u32 midx; + + for (midx = 0; midx < desc->pg_mirror_count; midx++) + nfs_pageio_complete_mirror(desc, midx); if (desc->pg_ops->pg_cleanup) desc->pg_ops->pg_cleanup(desc); + nfs_pageio_cleanup_mirroring(desc); } /** @@ -1080,10 +1253,17 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) */ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) { - if (!list_empty(&desc->pg_list)) { - struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); - if (index != prev->wb_index + 1) - nfs_pageio_complete(desc); + struct nfs_pgio_mirror *mirror; + struct nfs_page *prev; + u32 midx; + + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + mirror = &desc->pg_mirrors[midx]; + if (!list_empty(&mirror->pg_list)) { + prev = nfs_list_entry(mirror->pg_list.prev); + if (index != prev->wb_index + 1) + nfs_pageio_complete_mirror(desc, midx); + } } } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2da2e771fefe..5f7c422ebb5d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1646,8 +1646,8 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); * of bytes (maximum @req->wb_bytes) that can be coalesced. */ size_t -pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, - struct nfs_page *req) +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, + struct nfs_page *prev, struct nfs_page *req) { unsigned int size; u64 seg_end, req_start, seg_left; @@ -1729,10 +1729,12 @@ static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { - list_splice_tail_init(&hdr->pages, &desc->pg_list); + list_splice_tail_init(&hdr->pages, &mirror->pg_list); nfs_pageio_reset_write_mds(desc); - desc->pg_recoalesce = 1; + mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); } @@ -1781,12 +1783,14 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_header *hdr; int ret; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); @@ -1795,6 +1799,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) ret = nfs_generic_pgio(desc, hdr); if (!ret) pnfs_do_write(desc, hdr, desc->pg_ioflags); + return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); @@ -1839,10 +1844,13 @@ static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { - list_splice_tail_init(&hdr->pages, &desc->pg_list); + list_splice_tail_init(&hdr->pages, &mirror->pg_list); nfs_pageio_reset_read_mds(desc); - desc->pg_recoalesce = 1; + mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); } @@ -1893,12 +1901,14 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_header *hdr; int ret; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 092ab499f2b6..568ecf0a880f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -70,8 +70,15 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_read); void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { + struct nfs_pgio_mirror *mirror; + pgio->pg_ops = &nfs_pgio_rw_ops; - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; + + /* read path should never have more than one mirror */ + WARN_ON_ONCE(pgio->pg_mirror_count != 1); + + mirror = &pgio->pg_mirrors[0]; + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); @@ -81,6 +88,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct nfs_page *new; unsigned int len; struct nfs_pageio_descriptor pgio; + struct nfs_pgio_mirror *pgm; len = nfs_page_length(page); if (len == 0) @@ -97,7 +105,13 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); - NFS_I(inode)->read_io += pgio.pg_bytes_written; + + /* It doesn't make sense to do mirrored reads! */ + WARN_ON_ONCE(pgio.pg_mirror_count != 1); + + pgm = &pgio.pg_mirrors[0]; + NFS_I(inode)->read_io += pgm->pg_bytes_written; + return 0; } @@ -352,6 +366,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { struct nfs_pageio_descriptor pgio; + struct nfs_pgio_mirror *pgm; struct nfs_readdesc desc = { .pgio = &pgio, }; @@ -387,10 +402,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); - nfs_pageio_complete(&pgio); - NFS_I(inode)->read_io += pgio.pg_bytes_written; - npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + /* It doesn't make sense to do mirrored reads! */ + WARN_ON_ONCE(pgio.pg_mirror_count != 1); + + pgm = &pgio.pg_mirrors[0]; + NFS_I(inode)->read_io += pgm->pg_bytes_written; + npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; nfs_add_stats(inode, NFSIOS_READPAGES, npages); read_complete: put_nfs_open_context(desc.ctx); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2bee165fddcf..ceacfeeb28c2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -906,7 +906,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) if (nfs_write_need_commit(hdr)) { memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo, - 0); + hdr->pgio_mirror_idx); goto next; } remove_req: @@ -1304,8 +1304,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { + struct nfs_pgio_mirror *mirror; + pgio->pg_ops = &nfs_pgio_rw_ops; - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; + + nfs_pageio_stop_mirroring(pgio); + + mirror = &pgio->pg_mirrors[0]; + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 479c566c4ddc..3eb072dbce83 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -58,6 +58,8 @@ struct nfs_pageio_ops { size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); int (*pg_doio)(struct nfs_pageio_descriptor *); + unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *, + struct nfs_page *); void (*pg_cleanup)(struct nfs_pageio_descriptor *); }; @@ -74,15 +76,17 @@ struct nfs_rw_ops { struct rpc_task_setup *, int); }; -struct nfs_pageio_descriptor { +struct nfs_pgio_mirror { struct list_head pg_list; unsigned long pg_bytes_written; size_t pg_count; size_t pg_bsize; unsigned int pg_base; - unsigned char pg_moreio : 1, - pg_recoalesce : 1; + unsigned char pg_recoalesce : 1; +}; +struct nfs_pageio_descriptor { + unsigned char pg_moreio : 1; struct inode *pg_inode; const struct nfs_pageio_ops *pg_ops; const struct nfs_rw_ops *pg_rw_ops; @@ -93,8 +97,18 @@ struct nfs_pageio_descriptor { struct pnfs_layout_segment *pg_lseg; struct nfs_direct_req *pg_dreq; void *pg_layout_private; + unsigned int pg_bsize; /* default bsize for mirrors */ + + u32 pg_mirror_count; + struct nfs_pgio_mirror *pg_mirrors; + struct nfs_pgio_mirror pg_mirrors_static[1]; + struct nfs_pgio_mirror *pg_mirrors_dynamic; + u32 pg_mirror_idx; /* current mirror */ }; +/* arbitrarily selected limit to number of mirrors */ +#define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 + #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5bc99f04a550..6400a1e01aa4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1329,6 +1329,7 @@ struct nfs_pgio_header { struct nfs_page_array page_array; struct nfs_client *ds_clp; /* pNFS data server */ int ds_commit_idx; /* ds index if ds_clp is set */ + int pgio_mirror_idx;/* mirror index in pgio layer */ }; struct nfs_mds_commit_info { -- cgit v1.2.3 From d15bc38df607c893c36f4962dca0f57174c6a5c9 Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Fri, 13 Feb 2015 13:19:53 -0800 Subject: nfs: Provide and use helper functions for marking a page as unstable Signed-off-by: Tom Haynes Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 9 ++------- fs/nfs/flexfilelayout/flexfilelayout.c | 9 ++------- fs/nfs/internal.h | 13 +++++++++++++ fs/nfs/write.c | 9 ++------- 4 files changed, 19 insertions(+), 21 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 7ae1c263c5cf..e1e5ea262a13 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1000,13 +1000,8 @@ mds_commit: nfs_list_add_request(req, list); cinfo->mds->ncommit++; spin_unlock(cinfo->lock); - if (!cinfo->dreq) { - inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), - BDI_RECLAIMABLE); - __mark_inode_dirty(req->wb_context->dentry->d_inode, - I_DIRTY_DATASYNC); - } + if (!cinfo->dreq) + nfs_mark_page_unstable(req->wb_page); } static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index c22ecaa86c1c..423c2bc371fa 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1364,13 +1364,8 @@ ff_layout_mark_request_commit(struct nfs_page *req, nfs_list_add_request(req, list); cinfo->mds->ncommit++; spin_unlock(cinfo->lock); - if (!cinfo->dreq) { - inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), - BDI_RECLAIMABLE); - __mark_inode_dirty(req->wb_context->dentry->d_inode, - I_DIRTY_DATASYNC); - } + if (!cinfo->dreq) + nfs_mark_page_unstable(req->wb_page); } static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 212b8c883d22..b802fb3a2d99 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -597,6 +597,19 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) sb->s_maxbytes = MAX_LFS_FILESIZE; } +/* + * Record the page as unstable and mark its inode as dirty. + */ +static inline +void nfs_mark_page_unstable(struct page *page) +{ + struct inode *inode = page_file_mapping(page)->host; + + inc_zone_page_state(page, NR_UNSTABLE_NFS); + inc_bdi_stat(inode_to_bdi(inode), BDI_RECLAIMABLE); + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); +} + /* * Determine the number of bytes of data the page contains */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 88a6d2196ece..76c278acaefc 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -789,13 +789,8 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, nfs_list_add_request(req, dst); cinfo->mds->ncommit++; spin_unlock(cinfo->lock); - if (!cinfo->dreq) { - inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), - BDI_RECLAIMABLE); - __mark_inode_dirty(req->wb_context->dentry->d_inode, - I_DIRTY_DATASYNC); - } + if (!cinfo->dreq) + nfs_mark_page_unstable(req->wb_page); } EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); -- cgit v1.2.3 From 487b9b8afde60986b606b3ee05169fb893adc153 Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Fri, 13 Feb 2015 13:19:54 -0800 Subject: nfs: Can call nfs_clear_page_commit() instead Signed-off-by: Tom Haynes Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 76c278acaefc..595d81e354d1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1600,11 +1600,8 @@ void nfs_retry_commit(struct list_head *page_list, req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx); - if (!cinfo->dreq) { - dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(inode_to_bdi(page_file_mapping(req->wb_page)->host), - BDI_RECLAIMABLE); - } + if (!cinfo->dreq) + nfs_clear_page_commit(req->wb_page); nfs_unlock_and_release_request(req); } } -- cgit v1.2.3