diff options
Diffstat (limited to 'fs')
159 files changed, 5144 insertions, 2060 deletions
diff --git a/fs/afs/file.c b/fs/afs/file.c index d6bc3f5d784b..323ae9912203 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -17,6 +17,7 @@ #include <linux/writeback.h> #include <linux/gfp.h> #include <linux/task_io_accounting_ops.h> +#include <linux/mm.h> #include "internal.h" static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); @@ -441,7 +442,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, /* Count the number of contiguous pages at the front of the list. Note * that the list goes prev-wards rather than next-wards. */ - first = list_entry(pages->prev, struct page, lru); + first = lru_to_page(pages); index = first->index + 1; n = 1; for (p = first->lru.prev; p != pages; p = p->prev) { @@ -473,7 +474,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, * page at the end of the file. */ do { - page = list_entry(pages->prev, struct page, lru); + page = lru_to_page(pages); list_del(&page->lru); index = page->index; if (add_to_page_cache_lru(page, mapping, index, diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 0568fd986821..e432bd27a2e7 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -208,7 +208,7 @@ again: /* The new front of the queue now owns the state variables. */ next = list_entry(vnode->pending_locks.next, struct file_lock, fl_u.afs.link); - vnode->lock_key = afs_file_key(next->fl_file); + vnode->lock_key = key_get(afs_file_key(next->fl_file)); vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB; goto again; @@ -413,7 +413,7 @@ static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl) /* The new front of the queue now owns the state variables. */ next = list_entry(vnode->pending_locks.next, struct file_lock, fl_u.afs.link); - vnode->lock_key = afs_file_key(next->fl_file); + vnode->lock_key = key_get(afs_file_key(next->fl_file)); vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB; afs_lock_may_be_available(vnode); diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index fde6b4d4121e..3a9eaec06756 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -247,7 +247,7 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) } } - if (!still_probing || unlikely(signal_pending(current))) + if (!still_probing || signal_pending(current)) goto stop; schedule(); } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 6b17d3620414..1a4ce07fb406 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -414,7 +414,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { valid = true; } else { - vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; vnode->cb_v_break = vnode->volume->cb_v_break; valid = false; } @@ -546,6 +545,8 @@ void afs_evict_inode(struct inode *inode) #endif afs_put_permits(rcu_access_pointer(vnode->permit_cache)); + key_put(vnode->lock_key); + vnode->lock_key = NULL; _leave(""); } diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h index 07bc10f076aa..d443e2bfa094 100644 --- a/fs/afs/protocol_yfs.h +++ b/fs/afs/protocol_yfs.h @@ -161,3 +161,14 @@ struct yfs_xdr_YFSStoreVolumeStatus { struct yfs_xdr_u64 max_quota; struct yfs_xdr_u64 file_quota; } __packed; + +enum yfs_lock_type { + yfs_LockNone = -1, + yfs_LockRead = 0, + yfs_LockWrite = 1, + yfs_LockExtend = 2, + yfs_LockRelease = 3, + yfs_LockMandatoryRead = 0x100, + yfs_LockMandatoryWrite = 0x101, + yfs_LockMandatoryExtend = 0x102, +}; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index a7b44863d502..2c588f9bbbda 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -23,6 +23,7 @@ struct workqueue_struct *afs_async_calls; static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); +static void afs_delete_async_call(struct work_struct *); static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); @@ -203,20 +204,26 @@ void afs_put_call(struct afs_call *call) } } +static struct afs_call *afs_get_call(struct afs_call *call, + enum afs_call_trace why) +{ + int u = atomic_inc_return(&call->usage); + + trace_afs_call(call, why, u, + atomic_read(&call->net->nr_outstanding_calls), + __builtin_return_address(0)); + return call; +} + /* * Queue the call for actual work. */ static void afs_queue_call_work(struct afs_call *call) { if (call->type->work) { - int u = atomic_inc_return(&call->usage); - - trace_afs_call(call, afs_call_trace_work, u, - atomic_read(&call->net->nr_outstanding_calls), - __builtin_return_address(0)); - INIT_WORK(&call->work, call->type->work); + afs_get_call(call, afs_call_trace_work); if (!queue_work(afs_wq, &call->work)) afs_put_call(call); } @@ -398,6 +405,12 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, } } + /* If the call is going to be asynchronous, we need an extra ref for + * the call to hold itself so the caller need not hang on to its ref. + */ + if (call->async) + afs_get_call(call, afs_call_trace_get); + /* create a call */ rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, (unsigned long)call, @@ -438,15 +451,17 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, goto error_do_abort; } - /* at this point, an async call may no longer exist as it may have - * already completed */ - if (call->async) + /* Note that at this point, we may have received the reply or an abort + * - and an asynchronous call may already have completed. + */ + if (call->async) { + afs_put_call(call); return -EINPROGRESS; + } return afs_wait_for_call_to_complete(call, ac); error_do_abort: - call->state = AFS_CALL_COMPLETE; if (ret != -ECONNABORTED) { rxrpc_kernel_abort_call(call->net->socket, rxcall, RX_USER_ABORT, ret, "KSD"); @@ -463,8 +478,24 @@ error_do_abort: error_kill_call: if (call->type->done) call->type->done(call); - afs_put_call(call); + + /* We need to dispose of the extra ref we grabbed for an async call. + * The call, however, might be queued on afs_async_calls and we need to + * make sure we don't get any more notifications that might requeue it. + */ + if (call->rxcall) { + rxrpc_kernel_end_call(call->net->socket, call->rxcall); + call->rxcall = NULL; + } + if (call->async) { + if (cancel_work_sync(&call->async_work)) + afs_put_call(call); + afs_put_call(call); + } + ac->error = ret; + call->state = AFS_CALL_COMPLETE; + afs_put_call(call); _leave(" = %d", ret); return ret; } diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 95d0761cdb34..155dc14caef9 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -42,9 +42,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, if (vldb->fs_mask[i] & type_mask) nr_servers++; - slist = kzalloc(sizeof(struct afs_server_list) + - sizeof(struct afs_server_entry) * nr_servers, - GFP_KERNEL); + slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL); if (!slist) goto error; diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index f0b032976487..f402ee8171a1 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -248,7 +248,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist, } } - if (!still_probing || unlikely(signal_pending(current))) + if (!still_probing || signal_pending(current)) goto stop; schedule(); } diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 12658c1363ae..5aa57929e8c2 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -803,7 +803,7 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc, bp = xdr_encode_YFSFid(bp, &vnode->fid); bp = xdr_encode_string(bp, name, namesz); bp = xdr_encode_YFSStoreStatus_mode(bp, mode); - bp = xdr_encode_u32(bp, 0); /* ViceLockType */ + bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */ yfs_check_req(call, bp); afs_use_fs_server(call, fc->cbi); diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 9f9cadbfbd7a..3e59f0ed777b 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -42,6 +42,8 @@ #endif #define pr_fmt(fmt) KBUILD_MODNAME ":pid:%d:%s: " fmt, current->pid, __func__ +extern struct file_system_type autofs_fs_type; + /* * Unified info structure. This is pointed to by both the dentry and * inode structures. Each file in the filesystem has an instance of this @@ -101,16 +103,19 @@ struct autofs_wait_queue { #define AUTOFS_SBI_MAGIC 0x6d4a556d +#define AUTOFS_SBI_CATATONIC 0x0001 +#define AUTOFS_SBI_STRICTEXPIRE 0x0002 + struct autofs_sb_info { u32 magic; int pipefd; struct file *pipe; struct pid *oz_pgrp; - int catatonic; int version; int sub_version; int min_proto; int max_proto; + unsigned int flags; unsigned long exp_timeout; unsigned int type; struct super_block *sb; @@ -126,8 +131,7 @@ struct autofs_sb_info { static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb) { - return sb->s_magic != AUTOFS_SUPER_MAGIC ? - NULL : (struct autofs_sb_info *)(sb->s_fs_info); + return (struct autofs_sb_info *)(sb->s_fs_info); } static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry) @@ -141,7 +145,8 @@ static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry) */ static inline int autofs_oz_mode(struct autofs_sb_info *sbi) { - return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; + return ((sbi->flags & AUTOFS_SBI_CATATONIC) || + task_pgrp(current) == sbi->oz_pgrp); } struct inode *autofs_get_inode(struct super_block *, umode_t); diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index 86eafda4a652..e9fe74d1541b 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -151,22 +151,6 @@ out: return err; } -/* - * Get the autofs super block info struct from the file opened on - * the autofs mount point. - */ -static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f) -{ - struct autofs_sb_info *sbi = NULL; - struct inode *inode; - - if (f) { - inode = file_inode(f); - sbi = autofs_sbi(inode->i_sb); - } - return sbi; -} - /* Return autofs dev ioctl version */ static int autofs_dev_ioctl_version(struct file *fp, struct autofs_sb_info *sbi, @@ -366,7 +350,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, pipefd = param->setpipefd.pipefd; mutex_lock(&sbi->wq_mutex); - if (!sbi->catatonic) { + if (!(sbi->flags & AUTOFS_SBI_CATATONIC)) { mutex_unlock(&sbi->wq_mutex); return -EBUSY; } else { @@ -393,7 +377,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, swap(sbi->oz_pgrp, new_pid); sbi->pipefd = pipefd; sbi->pipe = pipe; - sbi->catatonic = 0; + sbi->flags &= ~AUTOFS_SBI_CATATONIC; } out: put_pid(new_pid); @@ -658,6 +642,8 @@ static int _autofs_dev_ioctl(unsigned int command, if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD && cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD && cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) { + struct super_block *sb; + fp = fget(param->ioctlfd); if (!fp) { if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) @@ -666,12 +652,13 @@ static int _autofs_dev_ioctl(unsigned int command, goto out; } - sbi = autofs_dev_ioctl_sbi(fp); - if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) { + sb = file_inode(fp)->i_sb; + if (sb->s_type != &autofs_fs_type) { err = -EINVAL; fput(fp); goto out; } + sbi = autofs_sbi(sb); /* * Admin needs to be able to set the mount catatonic in diff --git a/fs/autofs/init.c b/fs/autofs/init.c index 79ae07d9592f..c0c1db2cc6ea 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -16,7 +16,7 @@ static struct dentry *autofs_mount(struct file_system_type *fs_type, return mount_nodev(fs_type, flags, data, autofs_fill_super); } -static struct file_system_type autofs_fs_type = { +struct file_system_type autofs_fs_type = { .owner = THIS_MODULE, .name = "autofs", .mount = autofs_mount, diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 846c052569dd..0e8ea2d9a2bb 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -87,6 +87,8 @@ static int autofs_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",direct"); else seq_printf(m, ",indirect"); + if (sbi->flags & AUTOFS_SBI_STRICTEXPIRE) + seq_printf(m, ",strictexpire"); #ifdef CONFIG_CHECKPOINT_RESTORE if (sbi->pipe) seq_printf(m, ",pipe_ino=%ld", file_inode(sbi->pipe)->i_ino); @@ -109,7 +111,7 @@ static const struct super_operations autofs_sops = { }; enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, - Opt_indirect, Opt_direct, Opt_offset}; + Opt_indirect, Opt_direct, Opt_offset, Opt_strictexpire}; static const match_table_t tokens = { {Opt_fd, "fd=%u"}, @@ -121,24 +123,28 @@ static const match_table_t tokens = { {Opt_indirect, "indirect"}, {Opt_direct, "direct"}, {Opt_offset, "offset"}, + {Opt_strictexpire, "strictexpire"}, {Opt_err, NULL} }; -static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, - int *pgrp, bool *pgrp_set, unsigned int *type, - int *minproto, int *maxproto) +static int parse_options(char *options, + struct inode *root, int *pgrp, bool *pgrp_set, + struct autofs_sb_info *sbi) { char *p; substring_t args[MAX_OPT_ARGS]; int option; + int pipefd = -1; + kuid_t uid; + kgid_t gid; - *uid = current_uid(); - *gid = current_gid(); + root->i_uid = current_uid(); + root->i_gid = current_gid(); - *minproto = AUTOFS_MIN_PROTO_VERSION; - *maxproto = AUTOFS_MAX_PROTO_VERSION; + sbi->min_proto = AUTOFS_MIN_PROTO_VERSION; + sbi->max_proto = AUTOFS_MAX_PROTO_VERSION; - *pipefd = -1; + sbi->pipefd = -1; if (!options) return 1; @@ -152,22 +158,25 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, token = match_token(p, tokens, args); switch (token) { case Opt_fd: - if (match_int(args, pipefd)) + if (match_int(args, &pipefd)) return 1; + sbi->pipefd = pipefd; break; case Opt_uid: if (match_int(args, &option)) return 1; - *uid = make_kuid(current_user_ns(), option); - if (!uid_valid(*uid)) + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) return 1; + root->i_uid = uid; break; case Opt_gid: if (match_int(args, &option)) return 1; - *gid = make_kgid(current_user_ns(), option); - if (!gid_valid(*gid)) + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) return 1; + root->i_gid = gid; break; case Opt_pgrp: if (match_int(args, &option)) @@ -178,27 +187,30 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, case Opt_minproto: if (match_int(args, &option)) return 1; - *minproto = option; + sbi->min_proto = option; break; case Opt_maxproto: if (match_int(args, &option)) return 1; - *maxproto = option; + sbi->max_proto = option; break; case Opt_indirect: - set_autofs_type_indirect(type); + set_autofs_type_indirect(&sbi->type); break; case Opt_direct: - set_autofs_type_direct(type); + set_autofs_type_direct(&sbi->type); break; case Opt_offset: - set_autofs_type_offset(type); + set_autofs_type_offset(&sbi->type); + break; + case Opt_strictexpire: + sbi->flags |= AUTOFS_SBI_STRICTEXPIRE; break; default: return 1; } } - return (*pipefd < 0); + return (sbi->pipefd < 0); } int autofs_fill_super(struct super_block *s, void *data, int silent) @@ -206,7 +218,6 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) struct inode *root_inode; struct dentry *root; struct file *pipe; - int pipefd; struct autofs_sb_info *sbi; struct autofs_info *ino; int pgrp = 0; @@ -222,12 +233,12 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) sbi->magic = AUTOFS_SBI_MAGIC; sbi->pipefd = -1; sbi->pipe = NULL; - sbi->catatonic = 1; sbi->exp_timeout = 0; sbi->oz_pgrp = NULL; sbi->sb = s; sbi->version = 0; sbi->sub_version = 0; + sbi->flags = AUTOFS_SBI_CATATONIC; set_autofs_type_indirect(&sbi->type); sbi->min_proto = 0; sbi->max_proto = 0; @@ -262,9 +273,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) root->d_fsdata = ino; /* Can this call block? */ - if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, - &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto, - &sbi->max_proto)) { + if (parse_options(data, root_inode, &pgrp, &pgrp_set, sbi)) { pr_err("called with bogus options\n"); goto fail_dput; } @@ -303,8 +312,9 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) root_inode->i_fop = &autofs_root_operations; root_inode->i_op = &autofs_dir_inode_operations; - pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp)); - pipe = fget(pipefd); + pr_debug("pipe fd = %d, pgrp = %u\n", + sbi->pipefd, pid_nr(sbi->oz_pgrp)); + pipe = fget(sbi->pipefd); if (!pipe) { pr_err("could not open pipe file descriptor\n"); @@ -314,8 +324,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) if (ret < 0) goto fail_fput; sbi->pipe = pipe; - sbi->pipefd = pipefd; - sbi->catatonic = 0; + sbi->flags &= ~AUTOFS_SBI_CATATONIC; /* * Success! Install the root dentry now to indicate completion. diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 782e57b911ab..1246f396bf0e 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -275,8 +275,11 @@ static int autofs_mount_wait(const struct path *path, bool rcu_walk) pr_debug("waiting for mount name=%pd\n", path->dentry); status = autofs_wait(sbi, path, NFY_MOUNT); pr_debug("mount wait done status=%d\n", status); + ino->last_used = jiffies; + return status; } - ino->last_used = jiffies; + if (!(sbi->flags & AUTOFS_SBI_STRICTEXPIRE)) + ino->last_used = jiffies; return status; } @@ -510,7 +513,8 @@ static struct dentry *autofs_lookup(struct inode *dir, sbi = autofs_sbi(dir->i_sb); pr_debug("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", - current->pid, task_pgrp_nr(current), sbi->catatonic, + current->pid, task_pgrp_nr(current), + sbi->flags & AUTOFS_SBI_CATATONIC, autofs_oz_mode(sbi)); active = autofs_lookup_active(dentry); @@ -563,7 +567,7 @@ static int autofs_dir_symlink(struct inode *dir, * autofs mount is catatonic but the state of an autofs * file system needs to be preserved over restarts. */ - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -EACCES; BUG_ON(!ino); @@ -626,7 +630,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry) * autofs mount is catatonic but the state of an autofs * file system needs to be preserved over restarts. */ - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -EACCES; if (atomic_dec_and_test(&ino->count)) { @@ -714,7 +718,7 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry) * autofs mount is catatonic but the state of an autofs * file system needs to be preserved over restarts. */ - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -EACCES; spin_lock(&sbi->lookup_lock); @@ -759,7 +763,7 @@ static int autofs_dir_mkdir(struct inode *dir, * autofs mount is catatonic but the state of an autofs * file system needs to be preserved over restarts. */ - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -EACCES; pr_debug("dentry %p, creating %pd\n", dentry, dentry); diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c index f6385c6ef0a5..15a3e31d0904 100644 --- a/fs/autofs/waitq.c +++ b/fs/autofs/waitq.c @@ -20,14 +20,14 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi) struct autofs_wait_queue *wq, *nwq; mutex_lock(&sbi->wq_mutex); - if (sbi->catatonic) { + if (sbi->flags & AUTOFS_SBI_CATATONIC) { mutex_unlock(&sbi->wq_mutex); return; } pr_debug("entering catatonic mode\n"); - sbi->catatonic = 1; + sbi->flags |= AUTOFS_SBI_CATATONIC; wq = sbi->queues; sbi->queues = NULL; /* Erase all wait queues */ while (wq) { @@ -255,7 +255,7 @@ static int validate_request(struct autofs_wait_queue **wait, struct autofs_wait_queue *wq; struct autofs_info *ino; - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -ENOENT; /* Wait in progress, continue; */ @@ -290,7 +290,7 @@ static int validate_request(struct autofs_wait_queue **wait, if (mutex_lock_interruptible(&sbi->wq_mutex)) return -EINTR; - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -ENOENT; wq = autofs_find_wait(sbi, qstr); @@ -359,7 +359,7 @@ int autofs_wait(struct autofs_sb_info *sbi, pid_t tgid; /* In catatonic mode, we don't wait for nobody */ - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -ENOENT; /* diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 67aef3bb89e4..606f9378b2f0 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -1,13 +1,20 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * fs/bfs/bfs.h - * Copyright (C) 1999 Tigran Aivazian <tigran@veritas.com> + * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com> */ #ifndef _FS_BFS_BFS_H #define _FS_BFS_BFS_H #include <linux/bfs_fs.h> +/* In theory BFS supports up to 512 inodes, numbered from 2 (for /) up to 513 inclusive. + In actual fact, attempting to create the 512th inode (i.e. inode No. 513 or file No. 511) + will fail with ENOSPC in bfs_add_entry(): the root directory cannot contain so many entries, counting '..'. + So, mkfs.bfs(8) should really limit its -N option to 511 and not 512. For now, we just print a warning + if a filesystem is mounted with such "impossible to fill up" number of inodes */ +#define BFS_MAX_LASTI 513 + /* * BFS file system in-core superblock info */ @@ -17,7 +24,7 @@ struct bfs_sb_info { unsigned long si_freei; unsigned long si_lf_eblk; unsigned long si_lasti; - unsigned long *si_imap; + DECLARE_BITMAP(si_imap, BFS_MAX_LASTI+1); struct mutex bfs_lock; }; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index f32f21c3bbc7..d8dfe3a0cb39 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -2,8 +2,8 @@ /* * fs/bfs/dir.c * BFS directory operations. - * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> - * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005 + * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com> + * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005 */ #include <linux/time.h> diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 1476cdd90cfb..0dceefc54b48 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -2,7 +2,7 @@ /* * fs/bfs/file.c * BFS file operations. - * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> + * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com> * * Make the file block allocation algorithm understand the size * of the underlying block device. diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index d81c148682e7..d136b2aaafb3 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -1,10 +1,9 @@ /* * fs/bfs/inode.c * BFS superblock and inode operations. - * Copyright (C) 1999-2006 Tigran Aivazian <aivazian.tigran@gmail.com> + * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com> * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. - * - * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005. + * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005. */ #include <linux/module.h> @@ -118,12 +117,12 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct bfs_sb_info *info = BFS_SB(inode->i_sb); unsigned int ino = (u16)inode->i_ino; - unsigned long i_sblock; + unsigned long i_sblock; struct bfs_inode *di; struct buffer_head *bh; int err = 0; - dprintf("ino=%08x\n", ino); + dprintf("ino=%08x\n", ino); di = find_inode(inode->i_sb, ino, &bh); if (IS_ERR(di)) @@ -144,7 +143,7 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) di->i_atime = cpu_to_le32(inode->i_atime.tv_sec); di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); - i_sblock = BFS_I(inode)->i_sblock; + i_sblock = BFS_I(inode)->i_sblock; di->i_sblock = cpu_to_le32(i_sblock); di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock); di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); @@ -188,13 +187,13 @@ static void bfs_evict_inode(struct inode *inode) mark_buffer_dirty(bh); brelse(bh); - if (bi->i_dsk_ino) { + if (bi->i_dsk_ino) { if (bi->i_sblock) info->si_freeb += bi->i_eblock + 1 - bi->i_sblock; info->si_freei++; clear_bit(ino, info->si_imap); - bfs_dump_imap("delete_inode", s); - } + bfs_dump_imap("evict_inode", s); + } /* * If this was the last file, make the previous block @@ -214,7 +213,6 @@ static void bfs_put_super(struct super_block *s) return; mutex_destroy(&info->bfs_lock); - kfree(info->si_imap); kfree(info); s->s_fs_info = NULL; } @@ -311,8 +309,7 @@ void bfs_dump_imap(const char *prefix, struct super_block *s) else strcat(tmpbuf, "0"); } - printf("BFS-fs: %s: lasti=%08lx <%s>\n", - prefix, BFS_SB(s)->si_lasti, tmpbuf); + printf("%s: lasti=%08lx <%s>\n", prefix, BFS_SB(s)->si_lasti, tmpbuf); free_page((unsigned long)tmpbuf); #endif } @@ -322,7 +319,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) struct buffer_head *bh, *sbh; struct bfs_super_block *bfs_sb; struct inode *inode; - unsigned i, imap_len; + unsigned i; struct bfs_sb_info *info; int ret = -EINVAL; unsigned long i_sblock, i_eblock, i_eoff, s_size; @@ -341,8 +338,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) bfs_sb = (struct bfs_super_block *)sbh->b_data; if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { if (!silent) - printf("No BFS filesystem on %s (magic=%08x)\n", - s->s_id, le32_to_cpu(bfs_sb->s_magic)); + printf("No BFS filesystem on %s (magic=%08x)\n", s->s_id, le32_to_cpu(bfs_sb->s_magic)); goto out1; } if (BFS_UNCLEAN(bfs_sb, s) && !silent) @@ -351,18 +347,16 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) s->s_magic = BFS_MAGIC; if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) || - le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) { - printf("Superblock is corrupted\n"); + le32_to_cpu(bfs_sb->s_start) < sizeof(struct bfs_super_block) + sizeof(struct bfs_dirent)) { + printf("Superblock is corrupted on %s\n", s->s_id); goto out1; } - info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / - sizeof(struct bfs_inode) - + BFS_ROOT_INO - 1; - imap_len = (info->si_lasti / 8) + 1; - info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN); - if (!info->si_imap) { - printf("Cannot allocate %u bytes\n", imap_len); + info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / sizeof(struct bfs_inode) + BFS_ROOT_INO - 1; + if (info->si_lasti == BFS_MAX_LASTI) + printf("WARNING: filesystem %s was created with 512 inodes, the real maximum is 511, mounting anyway\n", s->s_id); + else if (info->si_lasti > BFS_MAX_LASTI) { + printf("Impossible last inode number %lu > %d on %s\n", info->si_lasti, BFS_MAX_LASTI, s->s_id); goto out1; } for (i = 0; i < BFS_ROOT_INO; i++) @@ -372,26 +366,25 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) inode = bfs_iget(s, BFS_ROOT_INO); if (IS_ERR(inode)) { ret = PTR_ERR(inode); - goto out2; + goto out1; } s->s_root = d_make_root(inode); if (!s->s_root) { ret = -ENOMEM; - goto out2; + goto out1; } info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS; - info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS; + info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS; info->si_freei = 0; info->si_lf_eblk = 0; /* can we read the last block? */ bh = sb_bread(s, info->si_blocks - 1); if (!bh) { - printf("Last block not available: %lu\n", info->si_blocks - 1); + printf("Last block not available on %s: %lu\n", s->s_id, info->si_blocks - 1); ret = -EIO; - goto out3; + goto out2; } brelse(bh); @@ -425,11 +418,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) (i_eoff != le32_to_cpu(-1) && i_eoff > s_size) || i_sblock * BFS_BSIZE > i_eoff) { - printf("Inode 0x%08x corrupted\n", i); + printf("Inode 0x%08x corrupted on %s\n", i, s->s_id); brelse(bh); ret = -EIO; - goto out3; + goto out2; } if (!di->i_ino) { @@ -445,14 +438,12 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) } brelse(bh); brelse(sbh); - bfs_dump_imap("read_super", s); + bfs_dump_imap("fill_super", s); return 0; -out3: +out2: dput(s->s_root); s->s_root = NULL; -out2: - kfree(info->si_imap); out1: brelse(sbh); out: @@ -482,7 +473,7 @@ static int __init init_bfs_fs(void) int err = init_inodecache(); if (err) goto out1; - err = register_filesystem(&bfs_fs_type); + err = register_filesystem(&bfs_fs_type); if (err) goto out; return 0; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index c3deb2e35f20..ca9725f18e00 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -78,9 +78,9 @@ static int aout_core_dump(struct coredump_params *cprm) /* make sure we actually have a data and stack area to dump */ set_fs(USER_DS); - if (!access_ok(VERIFY_READ, START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) + if (!access_ok(START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) dump.u_dsize = 0; - if (!access_ok(VERIFY_READ, START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) + if (!access_ok(START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) dump.u_ssize = 0; set_fs(KERNEL_DS); diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 7cde3f46ad26..d0078cbb718b 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -42,10 +42,14 @@ static int load_script(struct linux_binprm *bprm) fput(bprm->file); bprm->file = NULL; - bprm->buf[BINPRM_BUF_SIZE - 1] = '\0'; - if ((cp = strchr(bprm->buf, '\n')) == NULL) - cp = bprm->buf+BINPRM_BUF_SIZE-1; + for (cp = bprm->buf+2;; cp++) { + if (cp >= bprm->buf + BINPRM_BUF_SIZE) + return -ENOEXEC; + if (!*cp || (*cp == '\n')) + break; + } *cp = '\0'; + while (cp > bprm->buf) { cp--; if ((*cp == ' ') || (*cp == '\t')) diff --git a/fs/block_dev.c b/fs/block_dev.c index 450be88cffef..58a4c1217fa8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -104,6 +104,20 @@ void invalidate_bdev(struct block_device *bdev) } EXPORT_SYMBOL(invalidate_bdev); +static void set_init_blocksize(struct block_device *bdev) +{ + unsigned bsize = bdev_logical_block_size(bdev); + loff_t size = i_size_read(bdev->bd_inode); + + while (bsize < PAGE_SIZE) { + if (size & bsize) + break; + bsize <<= 1; + } + bdev->bd_block_size = bsize; + bdev->bd_inode->i_blkbits = blksize_bits(bsize); +} + int set_blocksize(struct block_device *bdev, int size) { /* Size must be a power of two, and between 512 and PAGE_SIZE */ @@ -237,11 +251,9 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, qc = submit_bio(&bio); for (;;) { - __set_current_state(TASK_UNINTERRUPTIBLE); - + set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(bio.bi_private)) break; - if (!(iocb->ki_flags & IOCB_HIPRI) || !blk_poll(bdev_get_queue(bdev), qc, true)) io_schedule(); @@ -426,8 +438,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) return -EIOCBQUEUED; for (;;) { - __set_current_state(TASK_UNINTERRUPTIBLE); - + set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(dio->waiter)) break; @@ -1434,18 +1445,9 @@ EXPORT_SYMBOL(check_disk_change); void bd_set_size(struct block_device *bdev, loff_t size) { - unsigned bsize = bdev_logical_block_size(bdev); - inode_lock(bdev->bd_inode); i_size_write(bdev->bd_inode, size); inode_unlock(bdev->bd_inode); - while (bsize < PAGE_SIZE) { - if (size & bsize) - break; - bsize <<= 1; - } - bdev->bd_block_size = bsize; - bdev->bd_inode->i_blkbits = blksize_bits(bsize); } EXPORT_SYMBOL(bd_set_size); @@ -1522,8 +1524,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } } - if (!ret) + if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); + set_init_blocksize(bdev); + } /* * If the device is invalidated, rescan partition @@ -1558,6 +1562,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + set_init_blocksize(bdev); } if (bdev->bd_bdi == &noop_backing_dev_info) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d92462fe66c8..f64aad613727 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1016,19 +1016,21 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, parent_start = parent->start; /* - * If we are COWing a node/leaf from the extent, chunk or device trees, - * make sure that we do not finish block group creation of pending block - * groups. We do this to avoid a deadlock. + * If we are COWing a node/leaf from the extent, chunk, device or free + * space trees, make sure that we do not finish block group creation of + * pending block groups. We do this to avoid a deadlock. * COWing can result in allocation of a new chunk, and flushing pending * block groups (btrfs_create_pending_block_groups()) can be triggered * when finishing allocation of a new chunk. Creation of a pending block - * group modifies the extent, chunk and device trees, therefore we could - * deadlock with ourselves since we are holding a lock on an extent - * buffer that btrfs_create_pending_block_groups() may try to COW later. + * group modifies the extent, chunk, device and free space trees, + * therefore we could deadlock with ourselves since we are holding a + * lock on an extent buffer that btrfs_create_pending_block_groups() may + * try to COW later. */ if (root == fs_info->extent_root || root == fs_info->chunk_root || - root == fs_info->dev_root) + root == fs_info->dev_root || + root == fs_info->free_space_root) trans->can_flush_pending_bgs = false; cow = btrfs_alloc_tree_block(trans, root, parent_start, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f031a447a047..7a2a2621f0d9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -35,6 +35,7 @@ struct btrfs_trans_handle; struct btrfs_transaction; struct btrfs_pending_snapshot; +struct btrfs_delayed_ref_root; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; @@ -786,6 +787,9 @@ enum { * main phase. The fs_info::balance_ctl is initialized. */ BTRFS_FS_BALANCE_RUNNING, + + /* Indicate that the cleaner thread is awake and doing something. */ + BTRFS_FS_CLEANER_RUNNING, }; struct btrfs_fs_info { @@ -1144,9 +1148,6 @@ struct btrfs_fs_info { struct mutex unused_bg_unpin_mutex; struct mutex delete_unused_bgs_mutex; - /* For btrfs to record security options */ - struct security_mnt_opts security_opts; - /* * Chunks that can't be freed yet (under a trim/discard operation) * and will be latter freed. Protected by fs_info->chunk_mutex. @@ -2664,6 +2665,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count); int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info, unsigned long count, u64 transid, int wait); +void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head); int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, @@ -3021,7 +3025,6 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) kfree(fs_info->free_space_root); kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); - security_free_mnt_opts(&fs_info->security_opts); kvfree(fs_info); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8da2f380d3c0..6a2a2a951705 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1682,6 +1682,8 @@ static int cleaner_kthread(void *arg) while (1) { again = 0; + set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); + /* Make the cleaner go to sleep early. */ if (btrfs_need_cleaner_sleep(fs_info)) goto sleep; @@ -1728,6 +1730,7 @@ static int cleaner_kthread(void *arg) */ btrfs_delete_unused_bgs(fs_info); sleep: + clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); if (kthread_should_park()) kthread_parkme(); if (kthread_should_stop()) @@ -4201,6 +4204,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->ordered_root_lock); } spin_unlock(&fs_info->ordered_root_lock); + + /* + * We need this here because if we've been flipped read-only we won't + * get sync() from the umount, so we need to make sure any ordered + * extents that haven't had their dirty pages IO start writeout yet + * actually get run and error out properly. + */ + btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); } static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, @@ -4265,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, if (pin_bytes) btrfs_pin_extent(fs_info, head->bytenr, head->num_bytes, 1); + btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); btrfs_put_delayed_ref_head(head); cond_resched(); spin_lock(&delayed_refs->lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b15afeae16df..d81035b7ea7d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2456,12 +2456,10 @@ static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans, return ret ? ret : 1; } -static void cleanup_ref_head_accounting(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_head *head) +void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head) { - struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_delayed_ref_root *delayed_refs = - &trans->transaction->delayed_refs; int nr_items = 1; /* Dropping this ref head update. */ if (head->total_ref_mod < 0) { @@ -2544,7 +2542,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, } } - cleanup_ref_head_accounting(trans, head); + btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); trace_run_delayed_ref_head(fs_info, head, 0); btrfs_delayed_ref_unlock(head); @@ -4954,6 +4952,15 @@ static void flush_space(struct btrfs_fs_info *fs_info, ret = 0; break; case COMMIT_TRANS: + /* + * If we have pending delayed iputs then we could free up a + * bunch of pinned space, so make sure we run the iputs before + * we do our pinned bytes check below. + */ + mutex_lock(&fs_info->cleaner_delayed_iput_mutex); + btrfs_run_delayed_iputs(fs_info); + mutex_unlock(&fs_info->cleaner_delayed_iput_mutex); + ret = may_commit_transaction(fs_info, space_info); break; default: @@ -7188,7 +7195,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, if (head->must_insert_reserved) ret = 1; - cleanup_ref_head_accounting(trans, head); + btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head); mutex_unlock(&head->mutex); btrfs_put_delayed_ref_head(head); return ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fc126b92ea59..52abe4082680 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4103,8 +4103,7 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages, while (!list_empty(pages)) { for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) { - struct page *page = list_entry(pages->prev, - struct page, lru); + struct page *page = lru_to_page(pages); prefetchw(&page->flags); list_del(&page->lru); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 43eb4535319d..5c349667c761 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3129,9 +3129,6 @@ out: /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - /* Try to release some metadata so we don't get an OOM but don't wait */ - btrfs_btree_balance_dirty_nodelay(fs_info); - return ret; } @@ -3254,6 +3251,8 @@ void btrfs_add_delayed_iput(struct inode *inode) ASSERT(list_empty(&binode->delayed_iput)); list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); spin_unlock(&fs_info->delayed_iput_lock); + if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags)) + wake_up_process(fs_info->cleaner_kthread); } void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fab9443f6a42..9c8e1734429c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3221,6 +3221,26 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) inode_lock_nested(inode2, I_MUTEX_CHILD); } +static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); +} + +static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + if (inode1 < inode2) { + swap(inode1, inode2); + swap(loff1, loff2); + } else if (inode1 == inode2 && loff2 < loff1) { + swap(loff1, loff2); + } + lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); +} + static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, struct inode *dst, u64 dst_loff) { @@ -3242,11 +3262,12 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, return -EINVAL; /* - * Lock destination range to serialize with concurrent readpages(). + * Lock destination range to serialize with concurrent readpages() and + * source range to serialize with relocation. */ - lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); - unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); return ret; } @@ -3905,17 +3926,33 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, len = ALIGN(src->i_size, bs) - off; if (destoff > inode->i_size) { + const u64 wb_start = ALIGN_DOWN(inode->i_size, bs); + ret = btrfs_cont_expand(inode, inode->i_size, destoff); if (ret) return ret; + /* + * We may have truncated the last block if the inode's size is + * not sector size aligned, so we need to wait for writeback to + * complete before proceeding further, otherwise we can race + * with cloning and attempt to increment a reference to an + * extent that no longer exists (writeback completed right after + * we found the previous extent covering eof and before we + * attempted to increment its reference count). + */ + ret = btrfs_wait_ordered_range(inode, wb_start, + destoff - wb_start); + if (ret) + return ret; } /* - * Lock destination range to serialize with concurrent readpages(). + * Lock destination range to serialize with concurrent readpages() and + * source range to serialize with relocation. */ - lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); + btrfs_double_extent_lock(src, off, inode, destoff, len); ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); - unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); + btrfs_double_extent_unlock(src, off, inode, destoff, len); /* * Truncate page cache pages so that future reads will see the cloned * data immediately and not the previous data. diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 1b15b43905f8..7ea2d6b1f170 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6646,7 +6646,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) goto out; } - if (!access_ok(VERIFY_READ, arg->clone_sources, + if (!access_ok(arg->clone_sources, sizeof(*arg->clone_sources) * arg->clone_sources_count)) { ret = -EFAULT; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 368a5b9e6c13..c5586ffd1426 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1458,56 +1458,6 @@ out: return root; } -static int parse_security_options(char *orig_opts, - struct security_mnt_opts *sec_opts) -{ - char *secdata = NULL; - int ret = 0; - - secdata = alloc_secdata(); - if (!secdata) - return -ENOMEM; - ret = security_sb_copy_data(orig_opts, secdata); - if (ret) { - free_secdata(secdata); - return ret; - } - ret = security_sb_parse_opts_str(secdata, sec_opts); - free_secdata(secdata); - return ret; -} - -static int setup_security_options(struct btrfs_fs_info *fs_info, - struct super_block *sb, - struct security_mnt_opts *sec_opts) -{ - int ret = 0; - - /* - * Call security_sb_set_mnt_opts() to check whether new sec_opts - * is valid. - */ - ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL); - if (ret) - return ret; - -#ifdef CONFIG_SECURITY - if (!fs_info->security_opts.num_mnt_opts) { - /* first time security setup, copy sec_opts to fs_info */ - memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts)); - } else { - /* - * Since SELinux (the only one supporting security_mnt_opts) - * does NOT support changing context during remount/mount of - * the same sb, this must be the same or part of the same - * security options, just free it. - */ - security_free_mnt_opts(sec_opts); - } -#endif - return ret; -} - /* * Find a superblock for the given device / mount point. * @@ -1522,16 +1472,15 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, struct btrfs_device *device = NULL; struct btrfs_fs_devices *fs_devices = NULL; struct btrfs_fs_info *fs_info = NULL; - struct security_mnt_opts new_sec_opts; + void *new_sec_opts = NULL; fmode_t mode = FMODE_READ; int error = 0; if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; - security_init_mnt_opts(&new_sec_opts); if (data) { - error = parse_security_options(data, &new_sec_opts); + error = security_sb_eat_lsm_opts(data, &new_sec_opts); if (error) return ERR_PTR(error); } @@ -1550,7 +1499,6 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); - security_init_mnt_opts(&fs_info->security_opts); if (!fs_info->super_copy || !fs_info->super_for_commit) { error = -ENOMEM; goto error_fs_info; @@ -1601,16 +1549,12 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, btrfs_sb(s)->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data); } + if (!error) + error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL); + security_free_mnt_opts(&new_sec_opts); if (error) { deactivate_locked_super(s); - goto error_sec_opts; - } - - fs_info = btrfs_sb(s); - error = setup_security_options(fs_info, s, &new_sec_opts); - if (error) { - deactivate_locked_super(s); - goto error_sec_opts; + return ERR_PTR(error); } return dget(s->s_root); @@ -1779,18 +1723,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_remount_prepare(fs_info); if (data) { - struct security_mnt_opts new_sec_opts; + void *new_sec_opts = NULL; - security_init_mnt_opts(&new_sec_opts); - ret = parse_security_options(data, &new_sec_opts); + ret = security_sb_eat_lsm_opts(data, &new_sec_opts); + if (!ret) + ret = security_sb_remount(sb, new_sec_opts); + security_free_mnt_opts(&new_sec_opts); if (ret) goto restore; - ret = setup_security_options(fs_info, sb, - &new_sec_opts); - if (ret) { - security_free_mnt_opts(&new_sec_opts); - goto restore; - } } ret = btrfs_parse_options(fs_info, data, *flags); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2576b1a379c9..3e4f8f88353e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7825,6 +7825,18 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, ret = -EUCLEAN; goto out; } + + /* It's possible this device is a dummy for seed device */ + if (dev->disk_total_bytes == 0) { + dev = find_device(fs_info->fs_devices->seed, devid, NULL); + if (!dev) { + btrfs_err(fs_info, "failed to find seed devid %llu", + devid); + ret = -EUCLEAN; + goto out; + } + } + if (physical_offset + physical_len > dev->disk_total_bytes) { btrfs_err(fs_info, "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu", diff --git a/fs/buffer.c b/fs/buffer.c index d60d61e8ed7d..52d024bfdbc1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2366,7 +2366,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, balance_dirty_pages_ratelimited(mapping); - if (unlikely(fatal_signal_pending(current))) { + if (fatal_signal_pending(current)) { err = -EINTR; goto out; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8eade7a993c1..a47c541f8006 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -306,7 +306,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx, struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; struct ceph_inode_info *ci = ceph_inode(inode); - struct page *page = list_entry(page_list->prev, struct page, lru); + struct page *page = lru_to_page(page_list); struct ceph_vino vino; struct ceph_osd_request *req; u64 off; @@ -333,8 +333,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx, if (got) ceph_put_cap_refs(ci, got); while (!list_empty(page_list)) { - page = list_entry(page_list->prev, - struct page, lru); + page = lru_to_page(page_list); list_del(&page->lru); put_page(page); } @@ -1495,10 +1494,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) if (err < 0 || off >= i_size_read(inode)) { unlock_page(page); put_page(page); - if (err == -ENOMEM) - ret = VM_FAULT_OOM; - else - ret = VM_FAULT_SIGBUS; + ret = vmf_error(err); goto out_inline; } if (err < PAGE_SIZE) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f3496db4bb3e..bba28a5034ba 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -657,6 +657,9 @@ void ceph_add_cap(struct inode *inode, session->s_nr_caps++; spin_unlock(&session->s_cap_lock); } else { + if (cap->cap_gen < session->s_cap_gen) + cap->issued = cap->implemented = CEPH_CAP_PIN; + /* * auth mds of the inode changed. we received the cap export * message, but still haven't received the cap import message. @@ -1032,6 +1035,8 @@ static void drop_inode_snap_realm(struct ceph_inode_info *ci) list_del_init(&ci->i_snap_realm_item); ci->i_snap_realm_counter++; ci->i_snap_realm = NULL; + if (realm->ino == ci->i_vino.ino) + realm->inode = NULL; spin_unlock(&realm->inodes_with_caps_lock); ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc, realm); @@ -1855,14 +1860,17 @@ retry_locked: retain |= CEPH_CAP_ANY; /* be greedy */ } else if (S_ISDIR(inode->i_mode) && (issued & CEPH_CAP_FILE_SHARED) && - __ceph_dir_is_complete(ci)) { + __ceph_dir_is_complete(ci)) { /* * If a directory is complete, we want to keep * the exclusive cap. So that MDS does not end up * revoking the shared cap on every create/unlink * operation. */ - want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL; + if (IS_RDONLY(inode)) + want = CEPH_CAP_ANY_SHARED; + else + want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL; retain |= want; } else { @@ -1970,8 +1978,7 @@ retry_locked: goto ack; /* things we might delay */ - if ((cap->issued & ~retain) == 0 && - cap->mds_wanted == want) + if ((cap->issued & ~retain) == 0) continue; /* nope, all good */ if (no_delay) @@ -3048,7 +3055,8 @@ static void handle_cap_grant(struct inode *inode, int used, wanted, dirty; u64 size = le64_to_cpu(grant->size); u64 max_size = le64_to_cpu(grant->max_size); - int check_caps = 0; + unsigned char check_caps = 0; + bool was_stale = cap->cap_gen < session->s_cap_gen; bool wake = false; bool writeback = false; bool queue_trunc = false; @@ -3063,21 +3071,6 @@ static void handle_cap_grant(struct inode *inode, /* - * auth mds of the inode changed. we received the cap export message, - * but still haven't received the cap import message. handle_cap_export - * updated the new auth MDS' cap. - * - * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message - * that was sent before the cap import message. So don't remove caps. - */ - if (ceph_seq_cmp(seq, cap->seq) <= 0) { - WARN_ON(cap != ci->i_auth_cap); - WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id)); - seq = cap->seq; - newcaps |= cap->issued; - } - - /* * If CACHE is being revoked, and we have no dirty buffers, * try to invalidate (once). (If there are dirty buffers, we * will invalidate _after_ writeback.) @@ -3096,6 +3089,24 @@ static void handle_cap_grant(struct inode *inode, } } + if (was_stale) + cap->issued = cap->implemented = CEPH_CAP_PIN; + + /* + * auth mds of the inode changed. we received the cap export message, + * but still haven't received the cap import message. handle_cap_export + * updated the new auth MDS' cap. + * + * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message + * that was sent before the cap import message. So don't remove caps. + */ + if (ceph_seq_cmp(seq, cap->seq) <= 0) { + WARN_ON(cap != ci->i_auth_cap); + WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id)); + seq = cap->seq; + newcaps |= cap->issued; + } + /* side effects now are allowed */ cap->cap_gen = session->s_cap_gen; cap->seq = seq; @@ -3200,13 +3211,20 @@ static void handle_cap_grant(struct inode *inode, ceph_cap_string(wanted), ceph_cap_string(used), ceph_cap_string(dirty)); - if (wanted != le32_to_cpu(grant->wanted)) { - dout("mds wanted %s -> %s\n", - ceph_cap_string(le32_to_cpu(grant->wanted)), - ceph_cap_string(wanted)); - /* imported cap may not have correct mds_wanted */ - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) - check_caps = 1; + + if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) && + (wanted & ~(cap->mds_wanted | newcaps))) { + /* + * If mds is importing cap, prior cap messages that update + * 'wanted' may get dropped by mds (migrate seq mismatch). + * + * We don't send cap message to update 'wanted' if what we + * want are already issued. If mds revokes caps, cap message + * that releases caps also tells mds what we want. But if + * caps got revoked by mds forcedly (session stale). We may + * haven't told mds what we want. + */ + check_caps = 1; } /* revocation, grant, or no-op? */ @@ -3539,9 +3557,9 @@ retry: goto out_unlock; if (target < 0) { - __ceph_remove_cap(cap, false); - if (!ci->i_auth_cap) + if (cap->mds_wanted | cap->issued) ci->i_ceph_flags |= CEPH_I_CAP_DROPPED; + __ceph_remove_cap(cap, false); goto out_unlock; } @@ -3569,7 +3587,6 @@ retry: tcap->cap_id = t_cap_id; tcap->seq = t_seq - 1; tcap->issue_seq = t_seq - 1; - tcap->mseq = t_mseq; tcap->issued |= issued; tcap->implemented |= issued; if (cap == ci->i_auth_cap) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 79dd5e6ed755..9d1f34d46627 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1098,8 +1098,9 @@ out_unlock: * splice a dentry to an inode. * caller must hold directory i_mutex for this to be safe. */ -static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) +static int splice_dentry(struct dentry **pdn, struct inode *in) { + struct dentry *dn = *pdn; struct dentry *realdn; BUG_ON(d_inode(dn)); @@ -1132,28 +1133,23 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) if (IS_ERR(realdn)) { pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", PTR_ERR(realdn), dn, in, ceph_vinop(in)); - dn = realdn; - /* - * Caller should release 'dn' in the case of error. - * If 'req->r_dentry' is passed to this function, - * caller should leave 'req->r_dentry' untouched. - */ - goto out; - } else if (realdn) { + return PTR_ERR(realdn); + } + + if (realdn) { dout("dn %p (%d) spliced with %p (%d) " "inode %p ino %llx.%llx\n", dn, d_count(dn), realdn, d_count(realdn), d_inode(realdn), ceph_vinop(d_inode(realdn))); dput(dn); - dn = realdn; + *pdn = realdn; } else { BUG_ON(!ceph_dentry(dn)); dout("dn %p attached to %p ino %llx.%llx\n", dn, d_inode(dn), ceph_vinop(d_inode(dn))); } -out: - return dn; + return 0; } /* @@ -1340,7 +1336,12 @@ retry_lookup: dout("dn %p gets new offset %lld\n", req->r_old_dentry, ceph_dentry(req->r_old_dentry)->offset); - dn = req->r_old_dentry; /* use old_dentry */ + /* swap r_dentry and r_old_dentry in case that + * splice_dentry() gets called later. This is safe + * because no other place will use them */ + req->r_dentry = req->r_old_dentry; + req->r_old_dentry = dn; + dn = req->r_dentry; } /* null dentry? */ @@ -1365,12 +1366,10 @@ retry_lookup: if (d_really_is_negative(dn)) { ceph_dir_clear_ordered(dir); ihold(in); - dn = splice_dentry(dn, in); - if (IS_ERR(dn)) { - err = PTR_ERR(dn); + err = splice_dentry(&req->r_dentry, in); + if (err < 0) goto done; - } - req->r_dentry = dn; /* may have spliced */ + dn = req->r_dentry; /* may have spliced */ } else if (d_really_is_positive(dn) && d_inode(dn) != in) { dout(" %p links to %p %llx.%llx, not %llx.%llx\n", dn, d_inode(dn), ceph_vinop(d_inode(dn)), @@ -1390,22 +1389,18 @@ retry_lookup: } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || req->r_op == CEPH_MDS_OP_MKSNAP) && !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { - struct dentry *dn = req->r_dentry; struct inode *dir = req->r_parent; /* fill out a snapdir LOOKUPSNAP dentry */ - BUG_ON(!dn); BUG_ON(!dir); BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR); - dout(" linking snapped dir %p to dn %p\n", in, dn); + BUG_ON(!req->r_dentry); + dout(" linking snapped dir %p to dn %p\n", in, req->r_dentry); ceph_dir_clear_ordered(dir); ihold(in); - dn = splice_dentry(dn, in); - if (IS_ERR(dn)) { - err = PTR_ERR(dn); + err = splice_dentry(&req->r_dentry, in); + if (err < 0) goto done; - } - req->r_dentry = dn; /* may have spliced */ } else if (rinfo->head->is_dentry) { struct ceph_vino *ptvino = NULL; @@ -1669,8 +1664,6 @@ retry_lookup: } if (d_really_is_negative(dn)) { - struct dentry *realdn; - if (ceph_security_xattr_deadlock(in)) { dout(" skip splicing dn %p to inode %p" " (security xattr deadlock)\n", dn, in); @@ -1679,13 +1672,9 @@ retry_lookup: goto next_item; } - realdn = splice_dentry(dn, in); - if (IS_ERR(realdn)) { - err = PTR_ERR(realdn); - d_drop(dn); + err = splice_dentry(&dn, in); + if (err < 0) goto next_item; - } - dn = realdn; } ceph_dentry(dn)->offset = rde->offset; @@ -1701,8 +1690,7 @@ retry_lookup: err = ret; } next_item: - if (dn) - dput(dn); + dput(dn); } out: if (err == 0 && skipped == 0) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index bd13a3267ae0..163fc74bf221 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1232,13 +1232,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, dout("removing cap %p, ci is %p, inode is %p\n", cap, ci, &ci->vfs_inode); spin_lock(&ci->i_ceph_lock); + if (cap->mds_wanted | cap->issued) + ci->i_ceph_flags |= CEPH_I_CAP_DROPPED; __ceph_remove_cap(cap, false); if (!ci->i_auth_cap) { struct ceph_cap_flush *cf; struct ceph_mds_client *mdsc = fsc->mdsc; - ci->i_ceph_flags |= CEPH_I_CAP_DROPPED; - if (ci->i_wrbuffer_ref > 0 && READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) invalidate = true; @@ -1355,6 +1355,12 @@ static void remove_session_caps(struct ceph_mds_session *session) dispose_cap_releases(session->s_mdsc, &dispose); } +enum { + RECONNECT, + RENEWCAPS, + FORCE_RO, +}; + /* * wake up any threads waiting on this session's caps. if the cap is * old (didn't get renewed on the client reconnect), remove it now. @@ -1365,23 +1371,34 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, void *arg) { struct ceph_inode_info *ci = ceph_inode(inode); + unsigned long ev = (unsigned long)arg; - if (arg) { + if (ev == RECONNECT) { spin_lock(&ci->i_ceph_lock); ci->i_wanted_max_size = 0; ci->i_requested_max_size = 0; spin_unlock(&ci->i_ceph_lock); + } else if (ev == RENEWCAPS) { + if (cap->cap_gen < cap->session->s_cap_gen) { + /* mds did not re-issue stale cap */ + spin_lock(&ci->i_ceph_lock); + cap->issued = cap->implemented = CEPH_CAP_PIN; + /* make sure mds knows what we want */ + if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted) + ci->i_ceph_flags |= CEPH_I_CAP_DROPPED; + spin_unlock(&ci->i_ceph_lock); + } + } else if (ev == FORCE_RO) { } wake_up_all(&ci->i_cap_wq); return 0; } -static void wake_up_session_caps(struct ceph_mds_session *session, - int reconnect) +static void wake_up_session_caps(struct ceph_mds_session *session, int ev) { dout("wake_up_session_caps %p mds%d\n", session, session->s_mds); iterate_session_caps(session, wake_up_session_cb, - (void *)(unsigned long)reconnect); + (void *)(unsigned long)ev); } /* @@ -1466,7 +1483,7 @@ static void renewed_caps(struct ceph_mds_client *mdsc, spin_unlock(&session->s_cap_lock); if (wake) - wake_up_session_caps(session, 0); + wake_up_session_caps(session, RENEWCAPS); } /* @@ -2847,7 +2864,7 @@ static void handle_session(struct ceph_mds_session *session, spin_lock(&session->s_cap_lock); session->s_readonly = true; spin_unlock(&session->s_cap_lock); - wake_up_session_caps(session, 0); + wake_up_session_caps(session, FORCE_RO); break; case CEPH_SESSION_REJECT: @@ -2943,11 +2960,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_inode_info *ci = cap->ci; struct ceph_reconnect_state *recon_state = arg; struct ceph_pagelist *pagelist = recon_state->pagelist; - char *path; - int pathlen, err; - u64 pathbase; + int err; u64 snap_follows; - struct dentry *dentry; dout(" adding %p ino %llx.%llx cap %p %lld %s\n", inode, ceph_vinop(inode), cap, cap->cap_id, @@ -2956,19 +2970,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, if (err) return err; - dentry = d_find_alias(inode); - if (dentry) { - path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out_dput; - } - } else { - path = NULL; - pathlen = 0; - pathbase = 0; - } - spin_lock(&ci->i_ceph_lock); cap->seq = 0; /* reset cap seq */ cap->issue_seq = 0; /* and issue_seq */ @@ -2980,7 +2981,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); rec.v2.issued = cpu_to_le32(cap->issued); rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); - rec.v2.pathbase = cpu_to_le64(pathbase); + rec.v2.pathbase = 0; rec.v2.flock_len = (__force __le32) ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { @@ -2991,7 +2992,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime); ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime); rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); - rec.v1.pathbase = cpu_to_le64(pathbase); + rec.v1.pathbase = 0; } if (list_empty(&ci->i_cap_snaps)) { @@ -3023,7 +3024,7 @@ encode_again: GFP_NOFS); if (!flocks) { err = -ENOMEM; - goto out_free; + goto out_err; } err = ceph_encode_locks_to_buffer(inode, flocks, num_fcntl_locks, @@ -3033,7 +3034,7 @@ encode_again: flocks = NULL; if (err == -ENOSPC) goto encode_again; - goto out_free; + goto out_err; } } else { kfree(flocks); @@ -3053,44 +3054,64 @@ encode_again: sizeof(struct ceph_filelock); rec.v2.flock_len = cpu_to_le32(struct_len); - struct_len += sizeof(rec.v2); - struct_len += sizeof(u32) + pathlen; + struct_len += sizeof(u32) + sizeof(rec.v2); if (struct_v >= 2) struct_len += sizeof(u64); /* snap_follows */ total_len += struct_len; err = ceph_pagelist_reserve(pagelist, total_len); + if (err) { + kfree(flocks); + goto out_err; + } - if (!err) { - if (recon_state->msg_version >= 3) { - ceph_pagelist_encode_8(pagelist, struct_v); - ceph_pagelist_encode_8(pagelist, 1); - ceph_pagelist_encode_32(pagelist, struct_len); - } - ceph_pagelist_encode_string(pagelist, path, pathlen); - ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2)); - ceph_locks_to_pagelist(flocks, pagelist, - num_fcntl_locks, - num_flock_locks); - if (struct_v >= 2) - ceph_pagelist_encode_64(pagelist, snap_follows); + if (recon_state->msg_version >= 3) { + ceph_pagelist_encode_8(pagelist, struct_v); + ceph_pagelist_encode_8(pagelist, 1); + ceph_pagelist_encode_32(pagelist, struct_len); } + ceph_pagelist_encode_string(pagelist, NULL, 0); + ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2)); + ceph_locks_to_pagelist(flocks, pagelist, + num_fcntl_locks, num_flock_locks); + if (struct_v >= 2) + ceph_pagelist_encode_64(pagelist, snap_follows); + kfree(flocks); } else { - size_t size = sizeof(u32) + pathlen + sizeof(rec.v1); - err = ceph_pagelist_reserve(pagelist, size); - if (!err) { - ceph_pagelist_encode_string(pagelist, path, pathlen); - ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); + u64 pathbase = 0; + int pathlen = 0; + char *path = NULL; + struct dentry *dentry; + + dentry = d_find_alias(inode); + if (dentry) { + path = ceph_mdsc_build_path(dentry, + &pathlen, &pathbase, 0); + dput(dentry); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out_err; + } + rec.v1.pathbase = cpu_to_le64(pathbase); } + + err = ceph_pagelist_reserve(pagelist, + pathlen + sizeof(u32) + sizeof(rec.v1)); + if (err) { + kfree(path); + goto out_err; + } + + ceph_pagelist_encode_string(pagelist, path, pathlen); + ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); + + kfree(path); } recon_state->nr_caps++; -out_free: - kfree(path); -out_dput: - dput(dentry); +out_err: return err; } @@ -3339,7 +3360,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, pr_info("mds%d recovery completed\n", s->s_mds); kick_requests(mdsc, i); ceph_kick_flushing_caps(mdsc, s); - wake_up_session_caps(s, 1); + wake_up_session_caps(s, RECONNECT); } } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 32fcce0d4d3c..729da155ebf0 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -17,14 +17,16 @@ #include <linux/ceph/auth.h> /* The first 8 bits are reserved for old ceph releases */ -#define CEPHFS_FEATURE_MIMIC 8 - -#define CEPHFS_FEATURES_ALL { \ - 0, 1, 2, 3, 4, 5, 6, 7, \ - CEPHFS_FEATURE_MIMIC, \ +#define CEPHFS_FEATURE_MIMIC 8 +#define CEPHFS_FEATURE_REPLY_ENCODING 9 +#define CEPHFS_FEATURE_RECLAIM_CLIENT 10 +#define CEPHFS_FEATURE_LAZY_CAP_WANTED 11 + +#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ + 0, 1, 2, 3, 4, 5, 6, 7, \ + CEPHFS_FEATURE_MIMIC, \ + CEPHFS_FEATURE_LAZY_CAP_WANTED, \ } - -#define CEPHFS_FEATURES_CLIENT_SUPPORTED CEPHFS_FEATURES_ALL #define CEPHFS_FEATURES_CLIENT_REQUIRED {} diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 44e53abeb32a..1a2c5d390f7f 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -35,7 +35,6 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) /* pick */ n = prandom_u32() % n; - i = 0; for (i = 0; n > 0; i++, n--) while (m->m_info[i].state <= 0) i++; diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 03f4d24db8fe..9455d3aef0c3 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -3,19 +3,6 @@ * quota.c - CephFS quota * * Copyright (C) 2017-2018 SUSE - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/statfs.h> diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 4e9a7cc488da..da2cd8e89062 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -530,7 +530,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_putc(m, ','); pos = m->count; - ret = ceph_print_client_options(m, fsc->client); + ret = ceph_print_client_options(m, fsc->client, false); if (ret) return ret; @@ -640,7 +640,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, opt = NULL; /* fsc->client now owns this */ fsc->client->extra_mon_dispatch = extra_mon_dispatch; - fsc->client->osdc.abort_on_full = true; + ceph_set_opt(fsc->client, ABORT_ON_FULL); if (!fsopt->mds_namespace) { ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 85dadb93c992..f1ddc9d03c10 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -190,8 +190,9 @@ config CIFS_DFS_UPCALL moves to a different server. This feature also enables an upcall mechanism for CIFS which contacts userspace helper utilities to provide server name resolution (host names to - IP addresses) which is needed for implicit mounts of DFS junction - points. If unsure, say Y. + IP addresses) which is needed in order to reconnect to + servers if their addresses change or for implicit mounts of + DFS junction points. If unsure, say Y. config CIFS_NFSD_EXPORT bool "Allow nfsd to export CIFS file system" diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 85817991ee68..51af69a1a328 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -17,7 +17,7 @@ cifs-$(CONFIG_CIFS_ACL) += cifsacl.o cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o -cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o +cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o dfs_cache.o cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index ba178b09de0b..593fb422d0f3 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -30,6 +30,9 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "cifsfs.h" +#ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs_cache.h" +#endif #ifdef CONFIG_CIFS_SMB_DIRECT #include "smbdirect.h" #endif @@ -629,6 +632,11 @@ cifs_proc_init(void) &cifs_security_flags_proc_fops); proc_create("LookupCacheEnabled", 0644, proc_fs_cifs, &cifs_lookup_cache_proc_fops); + +#ifdef CONFIG_CIFS_DFS_UPCALL + proc_create("dfscache", 0644, proc_fs_cifs, &dfscache_proc_fops); +#endif + #ifdef CONFIG_CIFS_SMB_DIRECT proc_create("rdma_readwrite_threshold", 0644, proc_fs_cifs, &cifs_rdma_readwrite_threshold_proc_fops); @@ -663,6 +671,10 @@ cifs_proc_clean(void) remove_proc_entry("SecurityFlags", proc_fs_cifs); remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); + +#ifdef CONFIG_CIFS_DFS_UPCALL + remove_proc_entry("dfscache", proc_fs_cifs); +#endif #ifdef CONFIG_CIFS_SMB_DIRECT remove_proc_entry("rdma_readwrite_threshold", proc_fs_cifs); remove_proc_entry("smbd_max_frmr_depth", proc_fs_cifs); diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index b97c74efd04a..d9b99abe1243 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -25,6 +25,7 @@ #include "dns_resolve.h" #include "cifs_debug.h" #include "cifs_unicode.h" +#include "dfs_cache.h" static LIST_HEAD(cifs_dfs_automount_list); @@ -126,7 +127,7 @@ cifs_build_devname(char *nodename, const char *prepath) * @sb_mountdata: parent/root DFS mount options (template) * @fullpath: full path in UNC format * @ref: server's referral - * @devname: pointer for saving device name + * @devname: optional pointer for saving device name * * creates mount options for submount based on template options sb_mountdata * and replacing unc,ip,prefixpath options with ones we've got form ref_unc. @@ -140,6 +141,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata, char **devname) { int rc; + char *name; char *mountdata = NULL; const char *prepath = NULL; int md_len; @@ -158,17 +160,17 @@ char *cifs_compose_mount_options(const char *sb_mountdata, prepath++; } - *devname = cifs_build_devname(ref->node_name, prepath); - if (IS_ERR(*devname)) { - rc = PTR_ERR(*devname); - *devname = NULL; + name = cifs_build_devname(ref->node_name, prepath); + if (IS_ERR(name)) { + rc = PTR_ERR(name); + name = NULL; goto compose_mount_options_err; } - rc = dns_resolve_server_name_to_ip(*devname, &srvIP); + rc = dns_resolve_server_name_to_ip(name, &srvIP); if (rc < 0) { cifs_dbg(FYI, "%s: Failed to resolve server part of %s to IP: %d\n", - __func__, *devname, rc); + __func__, name, rc); goto compose_mount_options_err; } @@ -224,6 +226,9 @@ char *cifs_compose_mount_options(const char *sb_mountdata, strcat(mountdata, "ip="); strcat(mountdata, srvIP); + if (devname) + *devname = name; + /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/ /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/ @@ -234,8 +239,7 @@ compose_mount_options_out: compose_mount_options_err: kfree(mountdata); mountdata = ERR_PTR(rc); - kfree(*devname); - *devname = NULL; + kfree(name); goto compose_mount_options_out; } @@ -251,20 +255,30 @@ static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt, { struct vfsmount *mnt; char *mountdata; - char *devname = NULL; + char *devname; + + /* + * Always pass down the DFS full path to smb3_do_mount() so we + * can use it later for failover. + */ + devname = kstrndup(fullpath, strlen(fullpath), GFP_KERNEL); + if (!devname) + return ERR_PTR(-ENOMEM); + + convert_delimiter(devname, '/'); /* strip first '\' from fullpath */ mountdata = cifs_compose_mount_options(cifs_sb->mountdata, - fullpath + 1, ref, &devname); - - if (IS_ERR(mountdata)) + fullpath + 1, ref, NULL); + if (IS_ERR(mountdata)) { + kfree(devname); return (struct vfsmount *)mountdata; + } mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata); kfree(mountdata); kfree(devname); return mnt; - } static void dump_referral(const struct dfs_info3_param *ref) @@ -282,16 +296,15 @@ static void dump_referral(const struct dfs_info3_param *ref) */ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) { - struct dfs_info3_param *referrals = NULL; - unsigned int num_referrals = 0; + struct dfs_info3_param referral = {0}; struct cifs_sb_info *cifs_sb; struct cifs_ses *ses; - char *full_path; + struct cifs_tcon *tcon; + char *full_path, *root_path; unsigned int xid; - int i; + int len; int rc; struct vfsmount *mnt; - struct tcon_link *tlink; cifs_dbg(FYI, "in %s\n", __func__); BUG_ON(IS_ROOT(mntpt)); @@ -315,48 +328,69 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) if (full_path == NULL) goto cdda_exit; - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) { - mnt = ERR_CAST(tlink); + cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path); + + if (!cifs_sb_master_tlink(cifs_sb)) { + cifs_dbg(FYI, "%s: master tlink is NULL\n", __func__); goto free_full_path; } - ses = tlink_tcon(tlink)->ses; + tcon = cifs_sb_master_tcon(cifs_sb); + if (!tcon) { + cifs_dbg(FYI, "%s: master tcon is NULL\n", __func__); + goto free_full_path; + } + + root_path = kstrdup(tcon->treeName, GFP_KERNEL); + if (!root_path) { + mnt = ERR_PTR(-ENOMEM); + goto free_full_path; + } + cifs_dbg(FYI, "%s: root path: %s\n", __func__, root_path); + + ses = tcon->ses; xid = get_xid(); - rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, - &num_referrals, &referrals, - cifs_remap(cifs_sb)); - free_xid(xid); - cifs_put_tlink(tlink); - - mnt = ERR_PTR(-ENOENT); - for (i = 0; i < num_referrals; i++) { - int len; - dump_referral(referrals + i); - /* connect to a node */ - len = strlen(referrals[i].node_name); - if (len < 2) { - cifs_dbg(VFS, "%s: Net Address path too short: %s\n", - __func__, referrals[i].node_name); - mnt = ERR_PTR(-EINVAL); - break; - } - mnt = cifs_dfs_do_refmount(mntpt, cifs_sb, - full_path, referrals + i); - cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n", - __func__, referrals[i].node_name, mnt); - if (!IS_ERR(mnt)) - goto success; + /* + * If DFS root has been expired, then unconditionally fetch it again to + * refresh DFS referral cache. + */ + rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), + root_path + 1, NULL, NULL); + if (!rc) { + rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, + cifs_remap(cifs_sb), full_path + 1, + &referral, NULL); } - /* no valid submounts were found; return error from get_dfs_path() by - * preference */ - if (rc != 0) + free_xid(xid); + + if (rc) { mnt = ERR_PTR(rc); + goto free_root_path; + } + + dump_referral(&referral); -success: - free_dfs_info_array(referrals, num_referrals); + len = strlen(referral.node_name); + if (len < 2) { + cifs_dbg(VFS, "%s: Net Address path too short: %s\n", + __func__, referral.node_name); + mnt = ERR_PTR(-EINVAL); + goto free_dfs_ref; + } + /* + * cifs_mount() will retry every available node server in case + * of failures. + */ + mnt = cifs_dfs_do_refmount(mntpt, cifs_sb, full_path, &referral); + cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n", __func__, + referral.node_name, mnt); + +free_dfs_ref: + free_dfs_info_param(&referral); +free_root_path: + kfree(root_path); free_full_path: kfree(full_path); cdda_exit: diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 63d7530f2e1d..42f0d67f1054 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -72,6 +72,15 @@ struct cifs_sb_info { char *mountdata; /* options received at mount time or via DFS refs */ struct delayed_work prune_tlinks; struct rcu_head rcu; + + /* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */ char *prepath; + + /* + * Path initially provided by the mount call. We might connect + * to something different via DFS but we want to keep it to do + * failover properly. + */ + char *origin_fullpath; /* \\HOST\SHARE\[OPTIONAL PATH] */ }; #endif /* _CIFS_FS_SB_H */ diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 85b31cfa2f3c..d2a05e46d6f5 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -224,7 +224,7 @@ int cifs_verify_signature(struct smb_rqst *rqst, if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) { struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)cifs_pdu; - if (pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE) + if (pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE) return 0; } @@ -304,12 +304,17 @@ int setup_ntlm_response(struct cifs_ses *ses, const struct nls_table *nls_cp) int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, char *lnm_session_key) { - int i; + int i, len; int rc; char password_with_pad[CIFS_ENCPWD_SIZE] = {0}; - if (password) - strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); + if (password) { + for (len = 0; len < CIFS_ENCPWD_SIZE; len++) + if (!password[len]) + break; + + memcpy(password_with_pad, password, len); + } if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) { memcpy(lnm_session_key, password_with_pad, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 865706edb307..62d48d486d8f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -52,6 +52,9 @@ #include "cifs_spnego.h" #include "fscache.h" #include "smb2pdu.h" +#ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs_cache.h" +#endif int cifsFYI = 0; bool traceSMB; @@ -1494,10 +1497,15 @@ init_cifs(void) if (rc) goto out_destroy_mids; +#ifdef CONFIG_CIFS_DFS_UPCALL + rc = dfs_cache_init(); + if (rc) + goto out_destroy_request_bufs; +#endif /* CONFIG_CIFS_DFS_UPCALL */ #ifdef CONFIG_CIFS_UPCALL rc = init_cifs_spnego(); if (rc) - goto out_destroy_request_bufs; + goto out_destroy_dfs_cache; #endif /* CONFIG_CIFS_UPCALL */ #ifdef CONFIG_CIFS_ACL @@ -1525,6 +1533,10 @@ out_register_key_type: #endif #ifdef CONFIG_CIFS_UPCALL exit_cifs_spnego(); +out_destroy_dfs_cache: +#endif +#ifdef CONFIG_CIFS_DFS_UPCALL + dfs_cache_destroy(); out_destroy_request_bufs: #endif cifs_destroy_request_bufs(); @@ -1556,6 +1568,9 @@ exit_cifs(void) #ifdef CONFIG_CIFS_UPCALL exit_cifs_spnego(); #endif +#ifdef CONFIG_CIFS_DFS_UPCALL + dfs_cache_destroy(); +#endif cifs_destroy_request_bufs(); cifs_destroy_mids(); cifs_destroy_inodecache(); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 4c3b5cfccc49..d1f9c2f3f575 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.14" +#define CIFS_VERSION "2.16" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 38ab0fca49e1..94dbdbe5be34 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -701,6 +701,13 @@ struct TCP_Server_Info { struct delayed_work reconnect; /* reconnect workqueue job */ struct mutex reconnect_mutex; /* prevent simultaneous reconnects */ unsigned long echo_interval; + + /* + * Number of targets available for reconnect. The more targets + * the more tasks have to wait to let the demultiplex thread + * reconnect. + */ + int nr_targets; }; static inline unsigned int @@ -1014,6 +1021,11 @@ struct cifs_tcon { struct list_head pending_opens; /* list of incomplete opens */ struct cached_fid crfid; /* Cached root fid */ /* BB add field for back pointer to sb struct(s)? */ +#ifdef CONFIG_CIFS_DFS_UPCALL + char *dfs_path; + int remap:2; + struct list_head ulist; /* cache update list */ +#endif }; /* @@ -1426,6 +1438,7 @@ struct mid_q_entry { int mid_state; /* wish this were enum but can not pass to wait_event */ unsigned int mid_flags; __le16 command; /* smb command code */ + unsigned int optype; /* operation type */ bool large_buf:1; /* if valid response, is pointer to large buf */ bool multiRsp:1; /* multiple trans2 responses for one request */ bool multiEnd:1; /* both received */ @@ -1508,6 +1521,7 @@ struct dfs_info3_param { int ref_flag; char *path_name; char *node_name; + int ttl; }; /* @@ -1545,7 +1559,6 @@ static inline void free_dfs_info_param(struct dfs_info3_param *param) if (param) { kfree(param->path_name); kfree(param->node_name); - kfree(param); } } @@ -1562,6 +1575,25 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, kfree(param); } +static inline bool is_interrupt_error(int error) +{ + switch (error) { + case -EINTR: + case -ERESTARTSYS: + case -ERESTARTNOHAND: + case -ERESTARTNOINTR: + return true; + } + return false; +} + +static inline bool is_retryable_error(int error) +{ + if (is_interrupt_error(error) || error == -EAGAIN) + return true; + return false; +} + #define MID_FREE 0 #define MID_REQUEST_ALLOCATED 1 #define MID_REQUEST_SUBMITTED 2 @@ -1790,6 +1822,7 @@ extern struct smb_version_values smb3any_values; extern struct smb_version_operations smb30_operations; extern struct smb_version_values smb30_values; #define SMB302_VERSION_STRING "3.02" +#define ALT_SMB302_VERSION_STRING "3.0.2" /*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */ extern struct smb_version_values smb302_values; #define SMB311_VERSION_STRING "3.1.1" diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index fa361bc00602..336c116995d7 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -22,6 +22,9 @@ #define _CIFSPROTO_H #include <linux/nls.h> #include "trace.h" +#ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs_cache.h" +#endif struct statfs; struct smb_vol; @@ -213,7 +216,7 @@ extern int cifs_match_super(struct super_block *, void *); extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info); extern struct smb_vol *cifs_get_volume_info(char *mount_data, const char *devname, bool is_smb3); -extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); +extern int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol); extern void cifs_umount(struct cifs_sb_info *); extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon); extern void cifs_reopen_persistent_handles(struct cifs_tcon *tcon); @@ -294,11 +297,6 @@ extern int CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, unsigned int *num_of_nodes, const struct nls_table *nls_codepage, int remap); -extern int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, - const char *old_path, - const struct nls_table *nls_codepage, - unsigned int *num_referrals, - struct dfs_info3_param **referrals, int remap); extern int parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, unsigned int *num_of_nodes, struct dfs_info3_param **target_nodes, @@ -524,6 +522,11 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, const struct nls_table *codepage); extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24); +extern void +cifs_cleanup_volume_info_contents(struct smb_vol *volume_info); + +extern struct TCP_Server_Info * +cifs_find_tcp_session(struct smb_vol *vol); void cifs_readdata_release(struct kref *refcount); int cifs_async_readv(struct cifs_readdata *rdata); @@ -562,4 +565,17 @@ void cifs_free_hash(struct crypto_shash **shash, struct sdesc **sdesc); extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page, unsigned int *len, unsigned int *offset); +void extract_unc_hostname(const char *unc, const char **h, size_t *len); + +#ifdef CONFIG_CIFS_DFS_UPCALL +static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, + const char *old_path, + const struct nls_table *nls_codepage, + struct dfs_info3_param *referral, int remap) +{ + return dfs_cache_find(xid, ses, nls_codepage, remap, old_path, + referral, NULL); +} +#endif + #endif /* _CIFSPROTO_H */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f82fd342bca5..e18915415e13 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -44,6 +44,9 @@ #include "cifs_debug.h" #include "fscache.h" #include "smbdirect.h" +#ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs_cache.h" +#endif #ifdef CONFIG_CIFS_POSIX static struct { @@ -118,6 +121,86 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) */ } +#ifdef CONFIG_CIFS_DFS_UPCALL +static int __cifs_reconnect_tcon(const struct nls_table *nlsc, + struct cifs_tcon *tcon) +{ + int rc; + struct dfs_cache_tgt_list tl; + struct dfs_cache_tgt_iterator *it = NULL; + char *tree; + const char *tcp_host; + size_t tcp_host_len; + const char *dfs_host; + size_t dfs_host_len; + + tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL); + if (!tree) + return -ENOMEM; + + if (tcon->ipc) { + snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", + tcon->ses->server->hostname); + rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc); + goto out; + } + + if (!tcon->dfs_path) { + rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc); + goto out; + } + + rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl); + if (rc) + goto out; + + extract_unc_hostname(tcon->ses->server->hostname, &tcp_host, + &tcp_host_len); + + for (it = dfs_cache_get_tgt_iterator(&tl); it; + it = dfs_cache_get_next_tgt(&tl, it)) { + const char *tgt = dfs_cache_get_tgt_name(it); + + extract_unc_hostname(tgt, &dfs_host, &dfs_host_len); + + if (dfs_host_len != tcp_host_len + || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) { + cifs_dbg(FYI, "%s: skipping %.*s, doesn't match %.*s", + __func__, + (int)dfs_host_len, dfs_host, + (int)tcp_host_len, tcp_host); + continue; + } + + snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt); + + rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc); + if (!rc) + break; + if (rc == -EREMOTE) + break; + } + + if (!rc) { + if (it) + rc = dfs_cache_noreq_update_tgthint(tcon->dfs_path + 1, + it); + else + rc = -ENOENT; + } + dfs_cache_free_tgts(&tl); +out: + kfree(tree); + return rc; +} +#else +static inline int __cifs_reconnect_tcon(const struct nls_table *nlsc, + struct cifs_tcon *tcon) +{ + return CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc); +} +#endif + /* reconnect the socket, tcon, and smb session if needed */ static int cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) @@ -126,6 +209,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) struct cifs_ses *ses; struct TCP_Server_Info *server; struct nls_table *nls_codepage; + int retries; /* * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for @@ -152,9 +236,12 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) } } + retries = server->nr_targets; + /* - * Give demultiplex thread up to 10 seconds to reconnect, should be - * greater than cifs socket timeout which is 7 seconds + * Give demultiplex thread up to 10 seconds to each target available for + * reconnect -- should be greater than cifs socket timeout which is 7 + * seconds. */ while (server->tcpStatus == CifsNeedReconnect) { rc = wait_event_interruptible_timeout(server->response_q, @@ -170,6 +257,9 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) if (server->tcpStatus != CifsNeedReconnect) break; + if (--retries) + continue; + /* * on "soft" mounts we wait once. Hard mounts keep * retrying until process is killed or server comes @@ -179,6 +269,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n"); return -EHOSTDOWN; } + retries = server->nr_targets; } if (!ses->need_reconnect && !tcon->need_reconnect) @@ -214,7 +305,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) } cifs_mark_open_files_invalid(tcon); - rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage); + rc = __cifs_reconnect_tcon(nls_codepage, tcon); mutex_unlock(&ses->session_mutex); cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); @@ -2032,7 +2123,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata) for (j = 0; j < nr_pages; j++) { unlock_page(wdata2->pages[j]); - if (rc != 0 && rc != -EAGAIN) { + if (rc != 0 && !is_retryable_error(rc)) { SetPageError(wdata2->pages[j]); end_page_writeback(wdata2->pages[j]); put_page(wdata2->pages[j]); @@ -2041,7 +2132,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata) if (rc) { kref_put(&wdata2->refcount, cifs_writedata_release); - if (rc == -EAGAIN) + if (is_retryable_error(rc)) continue; break; } @@ -2050,7 +2141,8 @@ cifs_writev_requeue(struct cifs_writedata *wdata) i += nr_pages; } while (i < wdata->nr_pages); - mapping_set_error(inode->i_mapping, rc); + if (rc != 0 && !is_retryable_error(rc)) + mapping_set_error(inode->i_mapping, rc); kref_put(&wdata->refcount, cifs_writedata_release); } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6f24f129a751..683310f26171 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -56,6 +56,11 @@ #include "fscache.h" #include "smb2proto.h" #include "smbdirect.h" +#include "dns_resolve.h" +#include "cifsfs.h" +#ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs_cache.h" +#endif extern mempool_t *cifs_req_poolp; extern bool disable_legacy_dialects; @@ -304,6 +309,7 @@ static const match_table_t cifs_smb_version_tokens = { { Smb_21, SMB21_VERSION_STRING }, { Smb_30, SMB30_VERSION_STRING }, { Smb_302, SMB302_VERSION_STRING }, + { Smb_302, ALT_SMB302_VERSION_STRING }, { Smb_311, SMB311_VERSION_STRING }, { Smb_311, ALT_SMB311_VERSION_STRING }, { Smb_3any, SMB3ANY_VERSION_STRING }, @@ -317,6 +323,132 @@ static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); static void cifs_prune_tlinks(struct work_struct *work); static int cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, const char *devname, bool is_smb3); +static char *extract_hostname(const char *unc); + +/* + * Resolve hostname and set ip addr in tcp ses. Useful for hostnames that may + * get their ip addresses changed at some point. + * + * This should be called with server->srv_mutex held. + */ +#ifdef CONFIG_CIFS_DFS_UPCALL +static int reconn_set_ipaddr(struct TCP_Server_Info *server) +{ + int rc; + int len; + char *unc, *ipaddr = NULL; + + if (!server->hostname) + return -EINVAL; + + len = strlen(server->hostname) + 3; + + unc = kmalloc(len, GFP_KERNEL); + if (!unc) { + cifs_dbg(FYI, "%s: failed to create UNC path\n", __func__); + return -ENOMEM; + } + snprintf(unc, len, "\\\\%s", server->hostname); + + rc = dns_resolve_server_name_to_ip(unc, &ipaddr); + kfree(unc); + + if (rc < 0) { + cifs_dbg(FYI, "%s: failed to resolve server part of %s to IP: %d\n", + __func__, server->hostname, rc); + return rc; + } + + rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr, + strlen(ipaddr)); + kfree(ipaddr); + + return !rc ? -1 : 0; +} +#else +static inline int reconn_set_ipaddr(struct TCP_Server_Info *server) +{ + return 0; +} +#endif + +#ifdef CONFIG_CIFS_DFS_UPCALL +struct super_cb_data { + struct TCP_Server_Info *server; + struct cifs_sb_info *cifs_sb; +}; + +/* These functions must be called with server->srv_mutex held */ + +static void super_cb(struct super_block *sb, void *arg) +{ + struct super_cb_data *d = arg; + struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; + + if (d->cifs_sb) + return; + + cifs_sb = CIFS_SB(sb); + tcon = cifs_sb_master_tcon(cifs_sb); + if (tcon->ses->server == d->server) + d->cifs_sb = cifs_sb; +} + +static inline struct cifs_sb_info * +find_super_by_tcp(struct TCP_Server_Info *server) +{ + struct super_cb_data d = { + .server = server, + .cifs_sb = NULL, + }; + + iterate_supers_type(&cifs_fs_type, super_cb, &d); + return d.cifs_sb ? d.cifs_sb : ERR_PTR(-ENOENT); +} + +static void reconn_inval_dfs_target(struct TCP_Server_Info *server, + struct cifs_sb_info *cifs_sb, + struct dfs_cache_tgt_list *tgt_list, + struct dfs_cache_tgt_iterator **tgt_it) +{ + const char *name; + + if (!cifs_sb || !cifs_sb->origin_fullpath || !tgt_list || + !server->nr_targets) + return; + + if (!*tgt_it) { + *tgt_it = dfs_cache_get_tgt_iterator(tgt_list); + } else { + *tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it); + if (!*tgt_it) + *tgt_it = dfs_cache_get_tgt_iterator(tgt_list); + } + + cifs_dbg(FYI, "%s: UNC: %s\n", __func__, cifs_sb->origin_fullpath); + + name = dfs_cache_get_tgt_name(*tgt_it); + + kfree(server->hostname); + + server->hostname = extract_hostname(name); + if (IS_ERR(server->hostname)) { + cifs_dbg(FYI, + "%s: failed to extract hostname from target: %ld\n", + __func__, PTR_ERR(server->hostname)); + } +} + +static inline int reconn_setup_dfs_targets(struct cifs_sb_info *cifs_sb, + struct dfs_cache_tgt_list *tl, + struct dfs_cache_tgt_iterator **it) +{ + if (!cifs_sb->origin_fullpath) + return -EOPNOTSUPP; + return dfs_cache_noreq_find(cifs_sb->origin_fullpath + 1, NULL, tl); +} +#endif /* * cifs tcp session reconnection @@ -335,8 +467,33 @@ cifs_reconnect(struct TCP_Server_Info *server) struct cifs_tcon *tcon; struct mid_q_entry *mid_entry; struct list_head retry_list; +#ifdef CONFIG_CIFS_DFS_UPCALL + struct cifs_sb_info *cifs_sb = NULL; + struct dfs_cache_tgt_list tgt_list = {0}; + struct dfs_cache_tgt_iterator *tgt_it = NULL; +#endif spin_lock(&GlobalMid_Lock); + server->nr_targets = 1; +#ifdef CONFIG_CIFS_DFS_UPCALL + cifs_sb = find_super_by_tcp(server); + if (IS_ERR(cifs_sb)) { + rc = PTR_ERR(cifs_sb); + cifs_dbg(FYI, "%s: will not do DFS failover: rc = %d\n", + __func__, rc); + cifs_sb = NULL; + } else { + rc = reconn_setup_dfs_targets(cifs_sb, &tgt_list, &tgt_it); + if (rc && (rc != -EOPNOTSUPP)) { + cifs_dbg(VFS, "%s: no target servers for DFS failover\n", + __func__); + } else { + server->nr_targets = dfs_cache_get_nr_tgts(&tgt_list); + } + } + cifs_dbg(FYI, "%s: will retry %d target(s)\n", __func__, + server->nr_targets); +#endif if (server->tcpStatus == CifsExiting) { /* the demux thread will exit normally next time through the loop */ @@ -410,14 +567,27 @@ cifs_reconnect(struct TCP_Server_Info *server) do { try_to_freeze(); - /* we should try only the port we connected to before */ mutex_lock(&server->srv_mutex); + /* + * Set up next DFS target server (if any) for reconnect. If DFS + * feature is disabled, then we will retry last server we + * connected to before. + */ if (cifs_rdma_enabled(server)) rc = smbd_reconnect(server); else rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); +#ifdef CONFIG_CIFS_DFS_UPCALL + reconn_inval_dfs_target(server, cifs_sb, &tgt_list, + &tgt_it); +#endif + rc = reconn_set_ipaddr(server); + if (rc) { + cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", + __func__, rc); + } mutex_unlock(&server->srv_mutex); msleep(3000); } else { @@ -430,6 +600,22 @@ cifs_reconnect(struct TCP_Server_Info *server) } } while (server->tcpStatus == CifsNeedReconnect); +#ifdef CONFIG_CIFS_DFS_UPCALL + if (tgt_it) { + rc = dfs_cache_noreq_update_tgthint(cifs_sb->origin_fullpath + 1, + tgt_it); + if (rc) { + cifs_dbg(VFS, "%s: failed to update DFS target hint: rc = %d\n", + __func__, rc); + } + rc = dfs_cache_update_vol(cifs_sb->origin_fullpath, server); + if (rc) { + cifs_dbg(VFS, "%s: failed to update vol info in DFS cache: rc = %d\n", + __func__, rc); + } + dfs_cache_free_tgts(&tgt_list); + } +#endif if (server->tcpStatus == CifsNeedNegotiate) mod_delayed_work(cifsiod_wq, &server->echo, 0); @@ -1043,7 +1229,12 @@ extract_hostname(const char *unc) /* skip double chars at beginning of string */ /* BB: check validity of these bytes? */ - src = unc + 2; + if (strlen(unc) < 3) + return ERR_PTR(-EINVAL); + for (src = unc; *src && *src == '\\'; src++) + ; + if (!*src) + return ERR_PTR(-EINVAL); /* delimiter between hostname and sharename is always '\\' now */ delim = strchr(src, '\\'); @@ -1827,7 +2018,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->password = NULL; break; } - /* Yes it is. Drop down to Opt_pass below.*/ + /* Fallthrough - to Opt_pass below.*/ case Opt_pass: /* Obtain the value string */ value = strchr(data, '='); @@ -2289,7 +2480,7 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol) return 1; } -static struct TCP_Server_Info * +struct TCP_Server_Info * cifs_find_tcp_session(struct smb_vol *vol) { struct TCP_Server_Info *server; @@ -2461,6 +2652,8 @@ smbd_connected: } tcp_ses->tcpStatus = CifsNeedNegotiate; + tcp_ses->nr_targets = 1; + /* thread spawned, put it on the list */ spin_lock(&cifs_tcp_ses_lock); list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list); @@ -3256,25 +3449,6 @@ out: return rc; } -int -get_dfs_path(const unsigned int xid, struct cifs_ses *ses, const char *old_path, - const struct nls_table *nls_codepage, unsigned int *num_referrals, - struct dfs_info3_param **referrals, int remap) -{ - int rc = 0; - - if (!ses->server->ops->get_dfs_refer) - return -ENOSYS; - - *num_referrals = 0; - *referrals = NULL; - - rc = ses->server->ops->get_dfs_refer(xid, ses, old_path, - referrals, num_referrals, - nls_codepage, remap); - return rc; -} - #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key cifs_key[2]; static struct lock_class_key cifs_slock_key[2]; @@ -3746,8 +3920,8 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, return 0; } -static void -cleanup_volume_info_contents(struct smb_vol *volume_info) +void +cifs_cleanup_volume_info_contents(struct smb_vol *volume_info) { kfree(volume_info->username); kzfree(volume_info->password); @@ -3762,10 +3936,136 @@ cifs_cleanup_volume_info(struct smb_vol *volume_info) { if (!volume_info) return; - cleanup_volume_info_contents(volume_info); + cifs_cleanup_volume_info_contents(volume_info); kfree(volume_info); } +/* Release all succeed connections */ +static inline void mount_put_conns(struct cifs_sb_info *cifs_sb, + unsigned int xid, + struct TCP_Server_Info *server, + struct cifs_ses *ses, struct cifs_tcon *tcon) +{ + int rc = 0; + + if (tcon) + cifs_put_tcon(tcon); + else if (ses) + cifs_put_smb_ses(ses); + else if (server) + cifs_put_tcp_session(server, 0); + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS; + free_xid(xid); +} + +/* Get connections for tcp, ses and tcon */ +static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, + unsigned int *xid, + struct TCP_Server_Info **nserver, + struct cifs_ses **nses, struct cifs_tcon **ntcon) +{ + int rc = 0; + struct TCP_Server_Info *server; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + *nserver = NULL; + *nses = NULL; + *ntcon = NULL; + + *xid = get_xid(); + + /* get a reference to a tcp session */ + server = cifs_get_tcp_session(vol); + if (IS_ERR(server)) { + rc = PTR_ERR(server); + return rc; + } + + *nserver = server; + + if ((vol->max_credits < 20) || (vol->max_credits > 60000)) + server->max_credits = SMB2_MAX_CREDITS_AVAILABLE; + else + server->max_credits = vol->max_credits; + + /* get a reference to a SMB session */ + ses = cifs_get_smb_ses(server, vol); + if (IS_ERR(ses)) { + rc = PTR_ERR(ses); + return rc; + } + + *nses = ses; + + if ((vol->persistent == true) && (!(ses->server->capabilities & + SMB2_GLOBAL_CAP_PERSISTENT_HANDLES))) { + cifs_dbg(VFS, "persistent handles not supported by server\n"); + return -EOPNOTSUPP; + } + + /* search for existing tcon to this server share */ + tcon = cifs_get_tcon(ses, vol); + if (IS_ERR(tcon)) { + rc = PTR_ERR(tcon); + return rc; + } + + *ntcon = tcon; + + /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */ + if (tcon->posix_extensions) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; + + /* tell server which Unix caps we support */ + if (cap_unix(tcon->ses)) { + /* + * reset of caps checks mount to see if unix extensions disabled + * for just this mount. + */ + reset_cifs_unix_caps(*xid, tcon, cifs_sb, vol); + if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) && + (le64_to_cpu(tcon->fsUnixInfo.Capability) & + CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) + return -EACCES; + } else + tcon->unix_ext = 0; /* server does not support them */ + + /* do not care if a following call succeed - informational */ + if (!tcon->pipe && server->ops->qfs_tcon) + server->ops->qfs_tcon(*xid, tcon); + + cifs_sb->wsize = server->ops->negotiate_wsize(tcon, vol); + cifs_sb->rsize = server->ops->negotiate_rsize(tcon, vol); + + return 0; +} + +static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, + struct cifs_tcon *tcon) +{ + struct tcon_link *tlink; + + /* hang the tcon off of the superblock */ + tlink = kzalloc(sizeof(*tlink), GFP_KERNEL); + if (tlink == NULL) + return -ENOMEM; + + tlink->tl_uid = ses->linux_uid; + tlink->tl_tcon = tcon; + tlink->tl_time = jiffies; + set_bit(TCON_LINK_MASTER, &tlink->tl_flags); + set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags); + + cifs_sb->master_tlink = tlink; + spin_lock(&cifs_sb->tlink_tree_lock); + tlink_rb_insert(&cifs_sb->tlink_tree, tlink); + spin_unlock(&cifs_sb->tlink_tree_lock); + + queue_delayed_work(cifsiod_wq, &cifs_sb->prune_tlinks, + TLINK_IDLE_EXPIRE); + return 0; +} #ifdef CONFIG_CIFS_DFS_UPCALL /* @@ -3774,10 +4074,11 @@ cifs_cleanup_volume_info(struct smb_vol *volume_info) */ static char * build_unc_path_to_root(const struct smb_vol *vol, - const struct cifs_sb_info *cifs_sb) + const struct cifs_sb_info *cifs_sb, bool useppath) { char *full_path, *pos; - unsigned int pplen = vol->prepath ? strlen(vol->prepath) + 1 : 0; + unsigned int pplen = useppath && vol->prepath ? + strlen(vol->prepath) + 1 : 0; unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1); full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL); @@ -3799,8 +4100,9 @@ build_unc_path_to_root(const struct smb_vol *vol, return full_path; } -/* - * Perform a dfs referral query for a share and (optionally) prefix +/** + * expand_dfs_referral - Perform a dfs referral query and update the cifs_sb + * * * If a referral is found, cifs_sb->mountdata will be (re-)allocated * to a string containing updated options for the submount. Otherwise it @@ -3815,39 +4117,36 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses, int check_prefix) { int rc; - unsigned int num_referrals = 0; - struct dfs_info3_param *referrals = NULL; + struct dfs_info3_param referral = {0}; char *full_path = NULL, *ref_path = NULL, *mdata = NULL; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) return -EREMOTE; - full_path = build_unc_path_to_root(volume_info, cifs_sb); + full_path = build_unc_path_to_root(volume_info, cifs_sb, true); if (IS_ERR(full_path)) return PTR_ERR(full_path); /* For DFS paths, skip the first '\' of the UNC */ ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1; - rc = get_dfs_path(xid, ses, ref_path, cifs_sb->local_nls, - &num_referrals, &referrals, cifs_remap(cifs_sb)); - - if (!rc && num_referrals > 0) { + rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), + ref_path, &referral, NULL); + if (!rc) { char *fake_devname = NULL; mdata = cifs_compose_mount_options(cifs_sb->mountdata, - full_path + 1, referrals, + full_path + 1, &referral, &fake_devname); - - free_dfs_info_array(referrals, num_referrals); + free_dfs_info_param(&referral); if (IS_ERR(mdata)) { rc = PTR_ERR(mdata); mdata = NULL; } else { - cleanup_volume_info_contents(volume_info); + cifs_cleanup_volume_info_contents(volume_info); rc = cifs_setup_volume_info(volume_info, mdata, - fake_devname, false); + fake_devname, false); } kfree(fake_devname); kfree(cifs_sb->mountdata); @@ -3856,6 +4155,143 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses, kfree(full_path); return rc; } + +static inline int get_next_dfs_tgt(const char *path, + struct dfs_cache_tgt_list *tgt_list, + struct dfs_cache_tgt_iterator **tgt_it) +{ + if (!*tgt_it) + *tgt_it = dfs_cache_get_tgt_iterator(tgt_list); + else + *tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it); + return !*tgt_it ? -EHOSTDOWN : 0; +} + +static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it, + struct smb_vol *fake_vol, struct smb_vol *vol) +{ + const char *tgt = dfs_cache_get_tgt_name(tgt_it); + int len = strlen(tgt) + 2; + char *new_unc; + + new_unc = kmalloc(len, GFP_KERNEL); + if (!new_unc) + return -ENOMEM; + snprintf(new_unc, len, "\\%s", tgt); + + kfree(vol->UNC); + vol->UNC = new_unc; + + if (fake_vol->prepath) { + kfree(vol->prepath); + vol->prepath = fake_vol->prepath; + fake_vol->prepath = NULL; + } + memcpy(&vol->dstaddr, &fake_vol->dstaddr, sizeof(vol->dstaddr)); + + return 0; +} + +static int setup_dfs_tgt_conn(const char *path, + const struct dfs_cache_tgt_iterator *tgt_it, + struct cifs_sb_info *cifs_sb, + struct smb_vol *vol, + unsigned int *xid, + struct TCP_Server_Info **server, + struct cifs_ses **ses, + struct cifs_tcon **tcon) +{ + int rc; + struct dfs_info3_param ref = {0}; + char *mdata = NULL, *fake_devname = NULL; + struct smb_vol fake_vol = {0}; + + cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path); + + rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref); + if (rc) + return rc; + + mdata = cifs_compose_mount_options(cifs_sb->mountdata, path, &ref, + &fake_devname); + free_dfs_info_param(&ref); + + if (IS_ERR(mdata)) { + rc = PTR_ERR(mdata); + mdata = NULL; + } else { + cifs_dbg(FYI, "%s: fake_devname: %s\n", __func__, fake_devname); + rc = cifs_setup_volume_info(&fake_vol, mdata, fake_devname, + false); + } + kfree(mdata); + kfree(fake_devname); + + if (!rc) { + /* + * We use a 'fake_vol' here because we need pass it down to the + * mount_{get,put} functions to test connection against new DFS + * targets. + */ + mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon); + rc = mount_get_conns(&fake_vol, cifs_sb, xid, server, ses, + tcon); + if (!rc) { + /* + * We were able to connect to new target server. + * Update current volume info with new target server. + */ + rc = update_vol_info(tgt_it, &fake_vol, vol); + } + } + cifs_cleanup_volume_info_contents(&fake_vol); + return rc; +} + +static int mount_do_dfs_failover(const char *path, + struct cifs_sb_info *cifs_sb, + struct smb_vol *vol, + struct cifs_ses *root_ses, + unsigned int *xid, + struct TCP_Server_Info **server, + struct cifs_ses **ses, + struct cifs_tcon **tcon) +{ + int rc; + struct dfs_cache_tgt_list tgt_list; + struct dfs_cache_tgt_iterator *tgt_it = NULL; + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) + return -EOPNOTSUPP; + + rc = dfs_cache_noreq_find(path, NULL, &tgt_list); + if (rc) + return rc; + + for (;;) { + /* Get next DFS target server - if any */ + rc = get_next_dfs_tgt(path, &tgt_list, &tgt_it); + if (rc) + break; + /* Connect to next DFS target */ + rc = setup_dfs_tgt_conn(path, tgt_it, cifs_sb, vol, xid, server, + ses, tcon); + if (!rc || rc == -EACCES || rc == -EOPNOTSUPP) + break; + } + if (!rc) { + /* + * Update DFS target hint in DFS referral cache with the target + * server we successfully reconnected to. + */ + rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses, + cifs_sb->local_nls, + cifs_remap(cifs_sb), path, + tgt_it); + } + dfs_cache_free_tgts(&tgt_list); + return rc; +} #endif static int @@ -3954,107 +4390,108 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server, return rc; } -int -cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) +/* + * Check if path is remote (e.g. a DFS share). Return -EREMOTE if it is, + * otherwise 0. + */ +static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb_vol *vol, + const unsigned int xid, + struct TCP_Server_Info *server, + struct cifs_tcon *tcon) { int rc; - unsigned int xid; - struct cifs_ses *ses; - struct cifs_tcon *tcon; - struct TCP_Server_Info *server; - char *full_path; - struct tcon_link *tlink; -#ifdef CONFIG_CIFS_DFS_UPCALL - int referral_walks_count = 0; -#endif - -#ifdef CONFIG_CIFS_DFS_UPCALL -try_mount_again: - /* cleanup activities if we're chasing a referral */ - if (referral_walks_count) { - if (tcon) - cifs_put_tcon(tcon); - else if (ses) - cifs_put_smb_ses(ses); + char *full_path; - cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS; + if (!server->ops->is_path_accessible) + return -EOPNOTSUPP; - free_xid(xid); - } -#endif - rc = 0; - tcon = NULL; - ses = NULL; - server = NULL; - full_path = NULL; - tlink = NULL; + /* + * cifs_build_path_to_root works only when we have a valid tcon + */ + full_path = cifs_build_path_to_root(vol, cifs_sb, tcon, + tcon->Flags & SMB_SHARE_IS_IN_DFS); + if (full_path == NULL) + return -ENOMEM; - xid = get_xid(); + cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path); - /* get a reference to a tcp session */ - server = cifs_get_tcp_session(volume_info); - if (IS_ERR(server)) { - rc = PTR_ERR(server); - goto out; - } - if ((volume_info->max_credits < 20) || - (volume_info->max_credits > 60000)) - server->max_credits = SMB2_MAX_CREDITS_AVAILABLE; - else - server->max_credits = volume_info->max_credits; - /* get a reference to a SMB session */ - ses = cifs_get_smb_ses(server, volume_info); - if (IS_ERR(ses)) { - rc = PTR_ERR(ses); - ses = NULL; - goto mount_fail_check; + rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, + full_path); + if (rc != 0 && rc != -EREMOTE) { + kfree(full_path); + return rc; } - if ((volume_info->persistent == true) && ((ses->server->capabilities & - SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) == 0)) { - cifs_dbg(VFS, "persistent handles not supported by server\n"); - rc = -EOPNOTSUPP; - goto mount_fail_check; + if (rc != -EREMOTE) { + rc = cifs_are_all_path_components_accessible(server, xid, tcon, + cifs_sb, + full_path); + if (rc != 0) { + cifs_dbg(VFS, "cannot query dirs between root and final path, " + "enabling CIFS_MOUNT_USE_PREFIX_PATH\n"); + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + rc = 0; + } } - /* search for existing tcon to this server share */ - tcon = cifs_get_tcon(ses, volume_info); - if (IS_ERR(tcon)) { - rc = PTR_ERR(tcon); - tcon = NULL; - if (rc == -EACCES) - goto mount_fail_check; - - goto remote_path_check; - } + kfree(full_path); + return rc; +} - /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */ - if (tcon->posix_extensions) - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; +#ifdef CONFIG_CIFS_DFS_UPCALL +int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) +{ + int rc = 0; + unsigned int xid; + struct cifs_ses *ses; + struct cifs_tcon *root_tcon = NULL; + struct cifs_tcon *tcon = NULL; + struct TCP_Server_Info *server; + char *root_path = NULL, *full_path = NULL; + char *old_mountdata; + int count; - /* tell server which Unix caps we support */ - if (cap_unix(tcon->ses)) { - /* reset of caps checks mount to see if unix extensions - disabled for just this mount */ - reset_cifs_unix_caps(xid, tcon, cifs_sb, volume_info); - if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) && - (le64_to_cpu(tcon->fsUnixInfo.Capability) & - CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) { - rc = -EACCES; - goto mount_fail_check; + rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon); + if (!rc && tcon) { + /* If not a standalone DFS root, then check if path is remote */ + rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, + cifs_remap(cifs_sb), vol->UNC + 1, NULL, + NULL); + if (rc) { + rc = is_path_remote(cifs_sb, vol, xid, server, tcon); + if (!rc) + goto out; + if (rc != -EREMOTE) + goto error; } - } else - tcon->unix_ext = 0; /* server does not support them */ - - /* do not care if a following call succeed - informational */ - if (!tcon->pipe && server->ops->qfs_tcon) - server->ops->qfs_tcon(xid, tcon); - - cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info); - cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info); + } + /* + * If first DFS target server went offline and we failed to connect it, + * server and ses pointers are NULL at this point, though we still have + * chance to get a cached DFS referral in expand_dfs_referral() and + * retry next target available in it. + * + * If a NULL ses ptr is passed to dfs_cache_find(), a lookup will be + * performed against DFS path and *no* requests will be sent to server + * for any new DFS referrals. Hence it's safe to skip checking whether + * server or ses ptr is NULL. + */ + if (rc == -EACCES || rc == -EOPNOTSUPP) + goto error; + + root_path = build_unc_path_to_root(vol, cifs_sb, false); + if (IS_ERR(root_path)) { + rc = PTR_ERR(root_path); + root_path = NULL; + goto error; + } -remote_path_check: -#ifdef CONFIG_CIFS_DFS_UPCALL + full_path = build_unc_path_to_root(vol, cifs_sb, true); + if (IS_ERR(full_path)) { + rc = PTR_ERR(full_path); + full_path = NULL; + goto error; + } /* * Perform an unconditional check for whether there are DFS * referrals for this path without prefix, to provide support @@ -4062,119 +4499,173 @@ remote_path_check: * with PATH_NOT_COVERED to requests that include the prefix. * Chase the referral if found, otherwise continue normally. */ - if (referral_walks_count == 0) { - int refrc = expand_dfs_referral(xid, ses, volume_info, cifs_sb, - false); - if (!refrc) { - referral_walks_count++; - goto try_mount_again; - } + old_mountdata = cifs_sb->mountdata; + (void)expand_dfs_referral(xid, ses, vol, cifs_sb, false); + + if (cifs_sb->mountdata == NULL) { + rc = -ENOENT; + goto error; } -#endif - /* check if a whole path is not remote */ - if (!rc && tcon) { - if (!server->ops->is_path_accessible) { - rc = -ENOSYS; - goto mount_fail_check; + if (cifs_sb->mountdata != old_mountdata) { + /* If we were redirected, reconnect to new target server */ + mount_put_conns(cifs_sb, xid, server, ses, tcon); + rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon); + } + if (rc) { + if (rc == -EACCES || rc == -EOPNOTSUPP) + goto error; + /* Perform DFS failover to any other DFS targets */ + rc = mount_do_dfs_failover(root_path + 1, cifs_sb, vol, NULL, + &xid, &server, &ses, &tcon); + if (rc) + goto error; + } + + kfree(root_path); + root_path = build_unc_path_to_root(vol, cifs_sb, false); + if (IS_ERR(root_path)) { + rc = PTR_ERR(root_path); + root_path = NULL; + goto error; + } + /* Cache out resolved root server */ + (void)dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), + root_path + 1, NULL, NULL); + /* + * Save root tcon for additional DFS requests to update or create a new + * DFS cache entry, or even perform DFS failover. + */ + spin_lock(&cifs_tcp_ses_lock); + tcon->tc_count++; + tcon->dfs_path = root_path; + root_path = NULL; + tcon->remap = cifs_remap(cifs_sb); + spin_unlock(&cifs_tcp_ses_lock); + + root_tcon = tcon; + + for (count = 1; ;) { + if (!rc && tcon) { + rc = is_path_remote(cifs_sb, vol, xid, server, tcon); + if (!rc || rc != -EREMOTE) + break; } /* - * cifs_build_path_to_root works only when we have a valid tcon + * BB: when we implement proper loop detection, + * we will remove this check. But now we need it + * to prevent an indefinite loop if 'DFS tree' is + * misconfigured (i.e. has loops). */ - full_path = cifs_build_path_to_root(volume_info, cifs_sb, tcon, - tcon->Flags & SMB_SHARE_IS_IN_DFS); - if (full_path == NULL) { - rc = -ENOMEM; - goto mount_fail_check; - } - rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, - full_path); - if (rc != 0 && rc != -EREMOTE) { - kfree(full_path); - goto mount_fail_check; + if (count++ > MAX_NESTED_LINKS) { + rc = -ELOOP; + break; } - if (rc != -EREMOTE) { - rc = cifs_are_all_path_components_accessible(server, - xid, tcon, cifs_sb, - full_path); - if (rc != 0) { - cifs_dbg(VFS, "cannot query dirs between root and final path, " - "enabling CIFS_MOUNT_USE_PREFIX_PATH\n"); - cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; - rc = 0; - } - } kfree(full_path); - } - - /* get referral if needed */ - if (rc == -EREMOTE) { -#ifdef CONFIG_CIFS_DFS_UPCALL - if (referral_walks_count > MAX_NESTED_LINKS) { - /* - * BB: when we implement proper loop detection, - * we will remove this check. But now we need it - * to prevent an indefinite loop if 'DFS tree' is - * misconfigured (i.e. has loops). - */ - rc = -ELOOP; - goto mount_fail_check; + full_path = build_unc_path_to_root(vol, cifs_sb, true); + if (IS_ERR(full_path)) { + rc = PTR_ERR(full_path); + full_path = NULL; + break; } - rc = expand_dfs_referral(xid, ses, volume_info, cifs_sb, true); + old_mountdata = cifs_sb->mountdata; + rc = expand_dfs_referral(xid, root_tcon->ses, vol, cifs_sb, + true); + if (rc) + break; - if (!rc) { - referral_walks_count++; - goto try_mount_again; + if (cifs_sb->mountdata != old_mountdata) { + mount_put_conns(cifs_sb, xid, server, ses, tcon); + rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, + &tcon); + } + if (rc) { + if (rc == -EACCES || rc == -EOPNOTSUPP) + break; + /* Perform DFS failover to any other DFS targets */ + rc = mount_do_dfs_failover(full_path + 1, cifs_sb, vol, + root_tcon->ses, &xid, + &server, &ses, &tcon); + if (rc == -EACCES || rc == -EOPNOTSUPP || !server || + !ses) + goto error; } - goto mount_fail_check; -#else /* No DFS support, return error on mount */ - rc = -EOPNOTSUPP; -#endif } + cifs_put_tcon(root_tcon); if (rc) - goto mount_fail_check; + goto error; - /* now, hang the tcon off of the superblock */ - tlink = kzalloc(sizeof *tlink, GFP_KERNEL); - if (tlink == NULL) { + spin_lock(&cifs_tcp_ses_lock); + if (!tcon->dfs_path) { + /* Save full path in new tcon to do failover when reconnecting tcons */ + tcon->dfs_path = full_path; + full_path = NULL; + tcon->remap = cifs_remap(cifs_sb); + } + cifs_sb->origin_fullpath = kstrndup(tcon->dfs_path, + strlen(tcon->dfs_path), + GFP_ATOMIC); + if (!cifs_sb->origin_fullpath) { + spin_unlock(&cifs_tcp_ses_lock); rc = -ENOMEM; - goto mount_fail_check; + goto error; } + spin_unlock(&cifs_tcp_ses_lock); - tlink->tl_uid = ses->linux_uid; - tlink->tl_tcon = tcon; - tlink->tl_time = jiffies; - set_bit(TCON_LINK_MASTER, &tlink->tl_flags); - set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags); + rc = dfs_cache_add_vol(vol, cifs_sb->origin_fullpath); + if (rc) { + kfree(cifs_sb->origin_fullpath); + goto error; + } + /* + * After reconnecting to a different server, unique ids won't + * match anymore, so we disable serverino. This prevents + * dentry revalidation to think the dentry are stale (ESTALE). + */ + cifs_autodisable_serverino(cifs_sb); +out: + free_xid(xid); + return mount_setup_tlink(cifs_sb, ses, tcon); - cifs_sb->master_tlink = tlink; - spin_lock(&cifs_sb->tlink_tree_lock); - tlink_rb_insert(&cifs_sb->tlink_tree, tlink); - spin_unlock(&cifs_sb->tlink_tree_lock); +error: + kfree(full_path); + kfree(root_path); + mount_put_conns(cifs_sb, xid, server, ses, tcon); + return rc; +} +#else +int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol) +{ + int rc = 0; + unsigned int xid; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; - queue_delayed_work(cifsiod_wq, &cifs_sb->prune_tlinks, - TLINK_IDLE_EXPIRE); + rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon); + if (rc) + goto error; -mount_fail_check: - /* on error free sesinfo and tcon struct if needed */ - if (rc) { - /* If find_unc succeeded then rc == 0 so we can not end */ - /* up accidentally freeing someone elses tcon struct */ - if (tcon) - cifs_put_tcon(tcon); - else if (ses) - cifs_put_smb_ses(ses); - else - cifs_put_tcp_session(server, 0); + if (tcon) { + rc = is_path_remote(cifs_sb, vol, xid, server, tcon); + if (rc == -EREMOTE) + rc = -EOPNOTSUPP; + if (rc) + goto error; } -out: free_xid(xid); + + return mount_setup_tlink(cifs_sb, ses, tcon); + +error: + mount_put_conns(cifs_sb, xid, server, ses, tcon); return rc; } +#endif /* * Issue a TREE_CONNECT request. @@ -4370,6 +4861,10 @@ cifs_umount(struct cifs_sb_info *cifs_sb) kfree(cifs_sb->mountdata); kfree(cifs_sb->prepath); +#ifdef CONFIG_CIFS_DFS_UPCALL + dfs_cache_del_vol(cifs_sb->origin_fullpath); + kfree(cifs_sb->origin_fullpath); +#endif call_rcu(&cifs_sb->rcu, delayed_free); } diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c new file mode 100644 index 000000000000..09b7d0d4f6e4 --- /dev/null +++ b/fs/cifs/dfs_cache.c @@ -0,0 +1,1368 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DFS referral cache routines + * + * Copyright (c) 2018 Paulo Alcantara <palcantara@suse.de> + */ + +#include <linux/rcupdate.h> +#include <linux/rculist.h> +#include <linux/jhash.h> +#include <linux/ktime.h> +#include <linux/slab.h> +#include <linux/nls.h> +#include <linux/workqueue.h> +#include "cifsglob.h" +#include "smb2pdu.h" +#include "smb2proto.h" +#include "cifsproto.h" +#include "cifs_debug.h" +#include "cifs_unicode.h" +#include "smb2glob.h" + +#include "dfs_cache.h" + +#define DFS_CACHE_HTABLE_SIZE 32 +#define DFS_CACHE_MAX_ENTRIES 64 + +#define IS_INTERLINK_SET(v) ((v) & (DFSREF_REFERRAL_SERVER | \ + DFSREF_STORAGE_SERVER)) + +struct dfs_cache_tgt { + char *t_name; + struct list_head t_list; +}; + +struct dfs_cache_entry { + struct hlist_node ce_hlist; + const char *ce_path; + int ce_ttl; + int ce_srvtype; + int ce_flags; + struct timespec64 ce_etime; + int ce_path_consumed; + int ce_numtgts; + struct list_head ce_tlist; + struct dfs_cache_tgt *ce_tgthint; + struct rcu_head ce_rcu; +}; + +static struct kmem_cache *dfs_cache_slab __read_mostly; + +struct dfs_cache_vol_info { + char *vi_fullpath; + struct smb_vol vi_vol; + struct list_head vi_list; +}; + +struct dfs_cache { + struct mutex dc_lock; + struct nls_table *dc_nlsc; + struct list_head dc_vol_list; + int dc_ttl; + struct delayed_work dc_refresh; +}; + +static struct dfs_cache dfs_cache; + +/* + * Number of entries in the cache + */ +static size_t dfs_cache_count; + +static DEFINE_MUTEX(dfs_cache_list_lock); +static struct hlist_head dfs_cache_htable[DFS_CACHE_HTABLE_SIZE]; + +static void refresh_cache_worker(struct work_struct *work); + +static inline bool is_path_valid(const char *path) +{ + return path && (strchr(path + 1, '\\') || strchr(path + 1, '/')); +} + +static inline int get_normalized_path(const char *path, char **npath) +{ + if (*path == '\\') { + *npath = (char *)path; + } else { + *npath = kstrndup(path, strlen(path), GFP_KERNEL); + if (!*npath) + return -ENOMEM; + convert_delimiter(*npath, '\\'); + } + return 0; +} + +static inline void free_normalized_path(const char *path, char *npath) +{ + if (path != npath) + kfree(npath); +} + +static inline bool cache_entry_expired(const struct dfs_cache_entry *ce) +{ + struct timespec64 ts; + + ktime_get_coarse_real_ts64(&ts); + return timespec64_compare(&ts, &ce->ce_etime) >= 0; +} + +static inline void free_tgts(struct dfs_cache_entry *ce) +{ + struct dfs_cache_tgt *t, *n; + + list_for_each_entry_safe(t, n, &ce->ce_tlist, t_list) { + list_del(&t->t_list); + kfree(t->t_name); + kfree(t); + } +} + +static void free_cache_entry(struct rcu_head *rcu) +{ + struct dfs_cache_entry *ce = container_of(rcu, struct dfs_cache_entry, + ce_rcu); + kmem_cache_free(dfs_cache_slab, ce); +} + +static inline void flush_cache_ent(struct dfs_cache_entry *ce) +{ + if (hlist_unhashed(&ce->ce_hlist)) + return; + + hlist_del_init_rcu(&ce->ce_hlist); + kfree(ce->ce_path); + free_tgts(ce); + dfs_cache_count--; + call_rcu(&ce->ce_rcu, free_cache_entry); +} + +static void flush_cache_ents(void) +{ + int i; + + rcu_read_lock(); + for (i = 0; i < DFS_CACHE_HTABLE_SIZE; i++) { + struct hlist_head *l = &dfs_cache_htable[i]; + struct dfs_cache_entry *ce; + + hlist_for_each_entry_rcu(ce, l, ce_hlist) + flush_cache_ent(ce); + } + rcu_read_unlock(); +} + +/* + * dfs cache /proc file + */ +static int dfscache_proc_show(struct seq_file *m, void *v) +{ + int bucket; + struct dfs_cache_entry *ce; + struct dfs_cache_tgt *t; + + seq_puts(m, "DFS cache\n---------\n"); + + mutex_lock(&dfs_cache_list_lock); + + rcu_read_lock(); + hash_for_each_rcu(dfs_cache_htable, bucket, ce, ce_hlist) { + seq_printf(m, + "cache entry: path=%s,type=%s,ttl=%d,etime=%ld," + "interlink=%s,path_consumed=%d,expired=%s\n", + ce->ce_path, + ce->ce_srvtype == DFS_TYPE_ROOT ? "root" : "link", + ce->ce_ttl, ce->ce_etime.tv_nsec, + IS_INTERLINK_SET(ce->ce_flags) ? "yes" : "no", + ce->ce_path_consumed, + cache_entry_expired(ce) ? "yes" : "no"); + + list_for_each_entry(t, &ce->ce_tlist, t_list) { + seq_printf(m, " %s%s\n", + t->t_name, + ce->ce_tgthint == t ? " (target hint)" : ""); + } + + } + rcu_read_unlock(); + + mutex_unlock(&dfs_cache_list_lock); + return 0; +} + +static ssize_t dfscache_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + char c; + int rc; + + rc = get_user(c, buffer); + if (rc) + return rc; + + if (c != '0') + return -EINVAL; + + cifs_dbg(FYI, "clearing dfs cache"); + mutex_lock(&dfs_cache_list_lock); + flush_cache_ents(); + mutex_unlock(&dfs_cache_list_lock); + + return count; +} + +static int dfscache_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, dfscache_proc_show, NULL); +} + +const struct file_operations dfscache_proc_fops = { + .open = dfscache_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = dfscache_proc_write, +}; + +#ifdef CONFIG_CIFS_DEBUG2 +static inline void dump_tgts(const struct dfs_cache_entry *ce) +{ + struct dfs_cache_tgt *t; + + cifs_dbg(FYI, "target list:\n"); + list_for_each_entry(t, &ce->ce_tlist, t_list) { + cifs_dbg(FYI, " %s%s\n", t->t_name, + ce->ce_tgthint == t ? " (target hint)" : ""); + } +} + +static inline void dump_ce(const struct dfs_cache_entry *ce) +{ + cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld," + "interlink=%s,path_consumed=%d,expired=%s\n", ce->ce_path, + ce->ce_srvtype == DFS_TYPE_ROOT ? "root" : "link", ce->ce_ttl, + ce->ce_etime.tv_nsec, + IS_INTERLINK_SET(ce->ce_flags) ? "yes" : "no", + ce->ce_path_consumed, + cache_entry_expired(ce) ? "yes" : "no"); + dump_tgts(ce); +} + +static inline void dump_refs(const struct dfs_info3_param *refs, int numrefs) +{ + int i; + + cifs_dbg(FYI, "DFS referrals returned by the server:\n"); + for (i = 0; i < numrefs; i++) { + const struct dfs_info3_param *ref = &refs[i]; + + cifs_dbg(FYI, + "\n" + "flags: 0x%x\n" + "path_consumed: %d\n" + "server_type: 0x%x\n" + "ref_flag: 0x%x\n" + "path_name: %s\n" + "node_name: %s\n" + "ttl: %d (%dm)\n", + ref->flags, ref->path_consumed, ref->server_type, + ref->ref_flag, ref->path_name, ref->node_name, + ref->ttl, ref->ttl / 60); + } +} +#else +#define dump_tgts(e) +#define dump_ce(e) +#define dump_refs(r, n) +#endif + +/** + * dfs_cache_init - Initialize DFS referral cache. + * + * Return zero if initialized successfully, otherwise non-zero. + */ +int dfs_cache_init(void) +{ + int i; + + dfs_cache_slab = kmem_cache_create("cifs_dfs_cache", + sizeof(struct dfs_cache_entry), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!dfs_cache_slab) + return -ENOMEM; + + for (i = 0; i < DFS_CACHE_HTABLE_SIZE; i++) + INIT_HLIST_HEAD(&dfs_cache_htable[i]); + + INIT_LIST_HEAD(&dfs_cache.dc_vol_list); + mutex_init(&dfs_cache.dc_lock); + INIT_DELAYED_WORK(&dfs_cache.dc_refresh, refresh_cache_worker); + dfs_cache.dc_ttl = -1; + dfs_cache.dc_nlsc = load_nls_default(); + + cifs_dbg(FYI, "%s: initialized DFS referral cache\n", __func__); + return 0; +} + +static inline unsigned int cache_entry_hash(const void *data, int size) +{ + unsigned int h; + + h = jhash(data, size, 0); + return h & (DFS_CACHE_HTABLE_SIZE - 1); +} + +/* Check whether second path component of @path is SYSVOL or NETLOGON */ +static inline bool is_sysvol_or_netlogon(const char *path) +{ + const char *s; + char sep = path[0]; + + s = strchr(path + 1, sep) + 1; + return !strncasecmp(s, "sysvol", strlen("sysvol")) || + !strncasecmp(s, "netlogon", strlen("netlogon")); +} + +/* Return target hint of a DFS cache entry */ +static inline char *get_tgt_name(const struct dfs_cache_entry *ce) +{ + struct dfs_cache_tgt *t = ce->ce_tgthint; + + return t ? t->t_name : ERR_PTR(-ENOENT); +} + +/* Return expire time out of a new entry's TTL */ +static inline struct timespec64 get_expire_time(int ttl) +{ + struct timespec64 ts = { + .tv_sec = ttl, + .tv_nsec = 0, + }; + struct timespec64 now; + + ktime_get_coarse_real_ts64(&now); + return timespec64_add(now, ts); +} + +/* Allocate a new DFS target */ +static inline struct dfs_cache_tgt *alloc_tgt(const char *name) +{ + struct dfs_cache_tgt *t; + + t = kmalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return ERR_PTR(-ENOMEM); + t->t_name = kstrndup(name, strlen(name), GFP_KERNEL); + if (!t->t_name) { + kfree(t); + return ERR_PTR(-ENOMEM); + } + INIT_LIST_HEAD(&t->t_list); + return t; +} + +/* + * Copy DFS referral information to a cache entry and conditionally update + * target hint. + */ +static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs, + struct dfs_cache_entry *ce, const char *tgthint) +{ + int i; + + ce->ce_ttl = refs[0].ttl; + ce->ce_etime = get_expire_time(ce->ce_ttl); + ce->ce_srvtype = refs[0].server_type; + ce->ce_flags = refs[0].ref_flag; + ce->ce_path_consumed = refs[0].path_consumed; + + for (i = 0; i < numrefs; i++) { + struct dfs_cache_tgt *t; + + t = alloc_tgt(refs[i].node_name); + if (IS_ERR(t)) { + free_tgts(ce); + return PTR_ERR(t); + } + if (tgthint && !strcasecmp(t->t_name, tgthint)) { + list_add(&t->t_list, &ce->ce_tlist); + tgthint = NULL; + } else { + list_add_tail(&t->t_list, &ce->ce_tlist); + } + ce->ce_numtgts++; + } + + ce->ce_tgthint = list_first_entry_or_null(&ce->ce_tlist, + struct dfs_cache_tgt, t_list); + + return 0; +} + +/* Allocate a new cache entry */ +static struct dfs_cache_entry * +alloc_cache_entry(const char *path, const struct dfs_info3_param *refs, + int numrefs) +{ + struct dfs_cache_entry *ce; + int rc; + + ce = kmem_cache_zalloc(dfs_cache_slab, GFP_KERNEL); + if (!ce) + return ERR_PTR(-ENOMEM); + + ce->ce_path = kstrdup_const(path, GFP_KERNEL); + if (!ce->ce_path) { + kmem_cache_free(dfs_cache_slab, ce); + return ERR_PTR(-ENOMEM); + } + INIT_HLIST_NODE(&ce->ce_hlist); + INIT_LIST_HEAD(&ce->ce_tlist); + + rc = copy_ref_data(refs, numrefs, ce, NULL); + if (rc) { + kfree(ce->ce_path); + kmem_cache_free(dfs_cache_slab, ce); + ce = ERR_PTR(rc); + } + return ce; +} + +static void remove_oldest_entry(void) +{ + int bucket; + struct dfs_cache_entry *ce; + struct dfs_cache_entry *to_del = NULL; + + rcu_read_lock(); + hash_for_each_rcu(dfs_cache_htable, bucket, ce, ce_hlist) { + if (!to_del || timespec64_compare(&ce->ce_etime, + &to_del->ce_etime) < 0) + to_del = ce; + } + if (!to_del) { + cifs_dbg(FYI, "%s: no entry to remove", __func__); + goto out; + } + cifs_dbg(FYI, "%s: removing entry", __func__); + dump_ce(to_del); + flush_cache_ent(to_del); +out: + rcu_read_unlock(); +} + +/* Add a new DFS cache entry */ +static inline struct dfs_cache_entry * +add_cache_entry(unsigned int hash, const char *path, + const struct dfs_info3_param *refs, int numrefs) +{ + struct dfs_cache_entry *ce; + + ce = alloc_cache_entry(path, refs, numrefs); + if (IS_ERR(ce)) + return ce; + + hlist_add_head_rcu(&ce->ce_hlist, &dfs_cache_htable[hash]); + + mutex_lock(&dfs_cache.dc_lock); + if (dfs_cache.dc_ttl < 0) { + dfs_cache.dc_ttl = ce->ce_ttl; + queue_delayed_work(cifsiod_wq, &dfs_cache.dc_refresh, + dfs_cache.dc_ttl * HZ); + } else { + dfs_cache.dc_ttl = min_t(int, dfs_cache.dc_ttl, ce->ce_ttl); + mod_delayed_work(cifsiod_wq, &dfs_cache.dc_refresh, + dfs_cache.dc_ttl * HZ); + } + mutex_unlock(&dfs_cache.dc_lock); + + return ce; +} + +static struct dfs_cache_entry *__find_cache_entry(unsigned int hash, + const char *path) +{ + struct dfs_cache_entry *ce; + bool found = false; + + rcu_read_lock(); + hlist_for_each_entry_rcu(ce, &dfs_cache_htable[hash], ce_hlist) { + if (!strcasecmp(path, ce->ce_path)) { +#ifdef CONFIG_CIFS_DEBUG2 + char *name = get_tgt_name(ce); + + if (unlikely(IS_ERR(name))) { + rcu_read_unlock(); + return ERR_CAST(name); + } + cifs_dbg(FYI, "%s: cache hit\n", __func__); + cifs_dbg(FYI, "%s: target hint: %s\n", __func__, name); +#endif + found = true; + break; + } + } + rcu_read_unlock(); + return found ? ce : ERR_PTR(-ENOENT); +} + +/* + * Find a DFS cache entry in hash table and optionally check prefix path against + * @path. + * Use whole path components in the match. + * Return ERR_PTR(-ENOENT) if the entry is not found. + */ +static inline struct dfs_cache_entry *find_cache_entry(const char *path, + unsigned int *hash) +{ + *hash = cache_entry_hash(path, strlen(path)); + return __find_cache_entry(*hash, path); +} + +static inline void destroy_slab_cache(void) +{ + rcu_barrier(); + kmem_cache_destroy(dfs_cache_slab); +} + +static inline void free_vol(struct dfs_cache_vol_info *vi) +{ + list_del(&vi->vi_list); + kfree(vi->vi_fullpath); + cifs_cleanup_volume_info_contents(&vi->vi_vol); + kfree(vi); +} + +static inline void free_vol_list(void) +{ + struct dfs_cache_vol_info *vi, *nvi; + + list_for_each_entry_safe(vi, nvi, &dfs_cache.dc_vol_list, vi_list) + free_vol(vi); +} + +/** + * dfs_cache_destroy - destroy DFS referral cache + */ +void dfs_cache_destroy(void) +{ + cancel_delayed_work_sync(&dfs_cache.dc_refresh); + unload_nls(dfs_cache.dc_nlsc); + free_vol_list(); + mutex_destroy(&dfs_cache.dc_lock); + + flush_cache_ents(); + destroy_slab_cache(); + mutex_destroy(&dfs_cache_list_lock); + + cifs_dbg(FYI, "%s: destroyed DFS referral cache\n", __func__); +} + +static inline struct dfs_cache_entry * +__update_cache_entry(const char *path, const struct dfs_info3_param *refs, + int numrefs) +{ + int rc; + unsigned int h; + struct dfs_cache_entry *ce; + char *s, *th = NULL; + + ce = find_cache_entry(path, &h); + if (IS_ERR(ce)) + return ce; + + if (ce->ce_tgthint) { + s = ce->ce_tgthint->t_name; + th = kstrndup(s, strlen(s), GFP_KERNEL); + if (!th) + return ERR_PTR(-ENOMEM); + } + + free_tgts(ce); + ce->ce_numtgts = 0; + + rc = copy_ref_data(refs, numrefs, ce, th); + kfree(th); + + if (rc) + ce = ERR_PTR(rc); + + return ce; +} + +/* Update an expired cache entry by getting a new DFS referral from server */ +static struct dfs_cache_entry * +update_cache_entry(const unsigned int xid, struct cifs_ses *ses, + const struct nls_table *nls_codepage, int remap, + const char *path, struct dfs_cache_entry *ce) +{ + int rc; + struct dfs_info3_param *refs = NULL; + int numrefs = 0; + + cifs_dbg(FYI, "%s: update expired cache entry\n", __func__); + /* + * Check if caller provided enough parameters to update an expired + * entry. + */ + if (!ses || !ses->server || !ses->server->ops->get_dfs_refer) + return ERR_PTR(-ETIME); + if (unlikely(!nls_codepage)) + return ERR_PTR(-ETIME); + + cifs_dbg(FYI, "%s: DFS referral request for %s\n", __func__, path); + + rc = ses->server->ops->get_dfs_refer(xid, ses, path, &refs, &numrefs, + nls_codepage, remap); + if (rc) + ce = ERR_PTR(rc); + else + ce = __update_cache_entry(path, refs, numrefs); + + dump_refs(refs, numrefs); + free_dfs_info_array(refs, numrefs); + + return ce; +} + +/* + * Find, create or update a DFS cache entry. + * + * If the entry wasn't found, it will create a new one. Or if it was found but + * expired, then it will update the entry accordingly. + * + * For interlinks, __cifs_dfs_mount() and expand_dfs_referral() are supposed to + * handle them properly. + */ +static struct dfs_cache_entry * +do_dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, + const struct nls_table *nls_codepage, int remap, + const char *path, bool noreq) +{ + int rc; + unsigned int h; + struct dfs_cache_entry *ce; + struct dfs_info3_param *nrefs; + int numnrefs; + + cifs_dbg(FYI, "%s: search path: %s\n", __func__, path); + + ce = find_cache_entry(path, &h); + if (IS_ERR(ce)) { + cifs_dbg(FYI, "%s: cache miss\n", __func__); + /* + * If @noreq is set, no requests will be sent to the server for + * either updating or getting a new DFS referral. + */ + if (noreq) + return ce; + /* + * No cache entry was found, so check for valid parameters that + * will be required to get a new DFS referral and then create a + * new cache entry. + */ + if (!ses || !ses->server || !ses->server->ops->get_dfs_refer) { + ce = ERR_PTR(-EOPNOTSUPP); + return ce; + } + if (unlikely(!nls_codepage)) { + ce = ERR_PTR(-EINVAL); + return ce; + } + + nrefs = NULL; + numnrefs = 0; + + cifs_dbg(FYI, "%s: DFS referral request for %s\n", __func__, + path); + + rc = ses->server->ops->get_dfs_refer(xid, ses, path, &nrefs, + &numnrefs, nls_codepage, + remap); + if (rc) { + ce = ERR_PTR(rc); + return ce; + } + + dump_refs(nrefs, numnrefs); + + cifs_dbg(FYI, "%s: new cache entry\n", __func__); + + if (dfs_cache_count >= DFS_CACHE_MAX_ENTRIES) { + cifs_dbg(FYI, "%s: reached max cache size (%d)", + __func__, DFS_CACHE_MAX_ENTRIES); + remove_oldest_entry(); + } + ce = add_cache_entry(h, path, nrefs, numnrefs); + free_dfs_info_array(nrefs, numnrefs); + + if (IS_ERR(ce)) + return ce; + + dfs_cache_count++; + } + + dump_ce(ce); + + /* Just return the found cache entry in case @noreq is set */ + if (noreq) + return ce; + + if (cache_entry_expired(ce)) { + cifs_dbg(FYI, "%s: expired cache entry\n", __func__); + ce = update_cache_entry(xid, ses, nls_codepage, remap, path, + ce); + if (IS_ERR(ce)) { + cifs_dbg(FYI, "%s: failed to update expired entry\n", + __func__); + } + } + return ce; +} + +/* Set up a new DFS referral from a given cache entry */ +static int setup_ref(const char *path, const struct dfs_cache_entry *ce, + struct dfs_info3_param *ref, const char *tgt) +{ + int rc; + + cifs_dbg(FYI, "%s: set up new ref\n", __func__); + + memset(ref, 0, sizeof(*ref)); + + ref->path_name = kstrndup(path, strlen(path), GFP_KERNEL); + if (!ref->path_name) + return -ENOMEM; + + ref->path_consumed = ce->ce_path_consumed; + + ref->node_name = kstrndup(tgt, strlen(tgt), GFP_KERNEL); + if (!ref->node_name) { + rc = -ENOMEM; + goto err_free_path; + } + + ref->ttl = ce->ce_ttl; + ref->server_type = ce->ce_srvtype; + ref->ref_flag = ce->ce_flags; + + return 0; + +err_free_path: + kfree(ref->path_name); + ref->path_name = NULL; + return rc; +} + +/* Return target list of a DFS cache entry */ +static int get_tgt_list(const struct dfs_cache_entry *ce, + struct dfs_cache_tgt_list *tl) +{ + int rc; + struct list_head *head = &tl->tl_list; + struct dfs_cache_tgt *t; + struct dfs_cache_tgt_iterator *it, *nit; + + memset(tl, 0, sizeof(*tl)); + INIT_LIST_HEAD(head); + + list_for_each_entry(t, &ce->ce_tlist, t_list) { + it = kzalloc(sizeof(*it), GFP_KERNEL); + if (!it) { + rc = -ENOMEM; + goto err_free_it; + } + + it->it_name = kstrndup(t->t_name, strlen(t->t_name), + GFP_KERNEL); + if (!it->it_name) { + kfree(it); + rc = -ENOMEM; + goto err_free_it; + } + + if (ce->ce_tgthint == t) + list_add(&it->it_list, head); + else + list_add_tail(&it->it_list, head); + } + tl->tl_numtgts = ce->ce_numtgts; + + return 0; + +err_free_it: + list_for_each_entry_safe(it, nit, head, it_list) { + kfree(it->it_name); + kfree(it); + } + return rc; +} + +/** + * dfs_cache_find - find a DFS cache entry + * + * If it doesn't find the cache entry, then it will get a DFS referral + * for @path and create a new entry. + * + * In case the cache entry exists but expired, it will get a DFS referral + * for @path and then update the respective cache entry. + * + * These parameters are passed down to the get_dfs_refer() call if it + * needs to be issued: + * @xid: syscall xid + * @ses: smb session to issue the request on + * @nls_codepage: charset conversion + * @remap: path character remapping type + * @path: path to lookup in DFS referral cache. + * + * @ref: when non-NULL, store single DFS referral result in it. + * @tgt_list: when non-NULL, store complete DFS target list in it. + * + * Return zero if the target was found, otherwise non-zero. + */ +int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, + const struct nls_table *nls_codepage, int remap, + const char *path, struct dfs_info3_param *ref, + struct dfs_cache_tgt_list *tgt_list) +{ + int rc; + char *npath; + struct dfs_cache_entry *ce; + + if (unlikely(!is_path_valid(path))) + return -EINVAL; + + rc = get_normalized_path(path, &npath); + if (rc) + return rc; + + mutex_lock(&dfs_cache_list_lock); + ce = do_dfs_cache_find(xid, ses, nls_codepage, remap, npath, false); + if (!IS_ERR(ce)) { + if (ref) + rc = setup_ref(path, ce, ref, get_tgt_name(ce)); + else + rc = 0; + if (!rc && tgt_list) + rc = get_tgt_list(ce, tgt_list); + } else { + rc = PTR_ERR(ce); + } + mutex_unlock(&dfs_cache_list_lock); + free_normalized_path(path, npath); + return rc; +} + +/** + * dfs_cache_noreq_find - find a DFS cache entry without sending any requests to + * the currently connected server. + * + * NOTE: This function will neither update a cache entry in case it was + * expired, nor create a new cache entry if @path hasn't been found. It heavily + * relies on an existing cache entry. + * + * @path: path to lookup in the DFS referral cache. + * @ref: when non-NULL, store single DFS referral result in it. + * @tgt_list: when non-NULL, store complete DFS target list in it. + * + * Return 0 if successful. + * Return -ENOENT if the entry was not found. + * Return non-zero for other errors. + */ +int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref, + struct dfs_cache_tgt_list *tgt_list) +{ + int rc; + char *npath; + struct dfs_cache_entry *ce; + + if (unlikely(!is_path_valid(path))) + return -EINVAL; + + rc = get_normalized_path(path, &npath); + if (rc) + return rc; + + mutex_lock(&dfs_cache_list_lock); + ce = do_dfs_cache_find(0, NULL, NULL, 0, npath, true); + if (IS_ERR(ce)) { + rc = PTR_ERR(ce); + goto out; + } + + if (ref) + rc = setup_ref(path, ce, ref, get_tgt_name(ce)); + else + rc = 0; + if (!rc && tgt_list) + rc = get_tgt_list(ce, tgt_list); +out: + mutex_unlock(&dfs_cache_list_lock); + free_normalized_path(path, npath); + return rc; +} + +/** + * dfs_cache_update_tgthint - update target hint of a DFS cache entry + * + * If it doesn't find the cache entry, then it will get a DFS referral for @path + * and create a new entry. + * + * In case the cache entry exists but expired, it will get a DFS referral + * for @path and then update the respective cache entry. + * + * @xid: syscall id + * @ses: smb session + * @nls_codepage: charset conversion + * @remap: type of character remapping for paths + * @path: path to lookup in DFS referral cache. + * @it: DFS target iterator + * + * Return zero if the target hint was updated successfully, otherwise non-zero. + */ +int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses, + const struct nls_table *nls_codepage, int remap, + const char *path, + const struct dfs_cache_tgt_iterator *it) +{ + int rc; + char *npath; + struct dfs_cache_entry *ce; + struct dfs_cache_tgt *t; + + if (unlikely(!is_path_valid(path))) + return -EINVAL; + + rc = get_normalized_path(path, &npath); + if (rc) + return rc; + + cifs_dbg(FYI, "%s: path: %s\n", __func__, npath); + + mutex_lock(&dfs_cache_list_lock); + ce = do_dfs_cache_find(xid, ses, nls_codepage, remap, npath, false); + if (IS_ERR(ce)) { + rc = PTR_ERR(ce); + goto out; + } + + rc = 0; + + t = ce->ce_tgthint; + + if (likely(!strcasecmp(it->it_name, t->t_name))) + goto out; + + list_for_each_entry(t, &ce->ce_tlist, t_list) { + if (!strcasecmp(t->t_name, it->it_name)) { + ce->ce_tgthint = t; + cifs_dbg(FYI, "%s: new target hint: %s\n", __func__, + it->it_name); + break; + } + } + +out: + mutex_unlock(&dfs_cache_list_lock); + free_normalized_path(path, npath); + return rc; +} + +/** + * dfs_cache_noreq_update_tgthint - update target hint of a DFS cache entry + * without sending any requests to the currently connected server. + * + * NOTE: This function will neither update a cache entry in case it was + * expired, nor create a new cache entry if @path hasn't been found. It heavily + * relies on an existing cache entry. + * + * @path: path to lookup in DFS referral cache. + * @it: target iterator which contains the target hint to update the cache + * entry with. + * + * Return zero if the target hint was updated successfully, otherwise non-zero. + */ +int dfs_cache_noreq_update_tgthint(const char *path, + const struct dfs_cache_tgt_iterator *it) +{ + int rc; + char *npath; + struct dfs_cache_entry *ce; + struct dfs_cache_tgt *t; + + if (unlikely(!is_path_valid(path)) || !it) + return -EINVAL; + + rc = get_normalized_path(path, &npath); + if (rc) + return rc; + + cifs_dbg(FYI, "%s: path: %s\n", __func__, npath); + + mutex_lock(&dfs_cache_list_lock); + + ce = do_dfs_cache_find(0, NULL, NULL, 0, npath, true); + if (IS_ERR(ce)) { + rc = PTR_ERR(ce); + goto out; + } + + rc = 0; + + t = ce->ce_tgthint; + + if (unlikely(!strcasecmp(it->it_name, t->t_name))) + goto out; + + list_for_each_entry(t, &ce->ce_tlist, t_list) { + if (!strcasecmp(t->t_name, it->it_name)) { + ce->ce_tgthint = t; + cifs_dbg(FYI, "%s: new target hint: %s\n", __func__, + it->it_name); + break; + } + } + +out: + mutex_unlock(&dfs_cache_list_lock); + free_normalized_path(path, npath); + return rc; +} + +/** + * dfs_cache_get_tgt_referral - returns a DFS referral (@ref) from a given + * target iterator (@it). + * + * @path: path to lookup in DFS referral cache. + * @it: DFS target iterator. + * @ref: DFS referral pointer to set up the gathered information. + * + * Return zero if the DFS referral was set up correctly, otherwise non-zero. + */ +int dfs_cache_get_tgt_referral(const char *path, + const struct dfs_cache_tgt_iterator *it, + struct dfs_info3_param *ref) +{ + int rc; + char *npath; + struct dfs_cache_entry *ce; + unsigned int h; + + if (!it || !ref) + return -EINVAL; + if (unlikely(!is_path_valid(path))) + return -EINVAL; + + rc = get_normalized_path(path, &npath); + if (rc) + return rc; + + cifs_dbg(FYI, "%s: path: %s\n", __func__, npath); + + mutex_lock(&dfs_cache_list_lock); + + ce = find_cache_entry(npath, &h); + if (IS_ERR(ce)) { + rc = PTR_ERR(ce); + goto out; + } + + cifs_dbg(FYI, "%s: target name: %s\n", __func__, it->it_name); + + rc = setup_ref(path, ce, ref, it->it_name); + +out: + mutex_unlock(&dfs_cache_list_lock); + free_normalized_path(path, npath); + return rc; +} + +static int dup_vol(struct smb_vol *vol, struct smb_vol *new) +{ + memcpy(new, vol, sizeof(*new)); + + if (vol->username) { + new->username = kstrndup(vol->username, strlen(vol->username), + GFP_KERNEL); + if (!new->username) + return -ENOMEM; + } + if (vol->password) { + new->password = kstrndup(vol->password, strlen(vol->password), + GFP_KERNEL); + if (!new->password) + goto err_free_username; + } + if (vol->UNC) { + cifs_dbg(FYI, "%s: vol->UNC: %s\n", __func__, vol->UNC); + new->UNC = kstrndup(vol->UNC, strlen(vol->UNC), GFP_KERNEL); + if (!new->UNC) + goto err_free_password; + } + if (vol->domainname) { + new->domainname = kstrndup(vol->domainname, + strlen(vol->domainname), GFP_KERNEL); + if (!new->domainname) + goto err_free_unc; + } + if (vol->iocharset) { + new->iocharset = kstrndup(vol->iocharset, + strlen(vol->iocharset), GFP_KERNEL); + if (!new->iocharset) + goto err_free_domainname; + } + if (vol->prepath) { + cifs_dbg(FYI, "%s: vol->prepath: %s\n", __func__, vol->prepath); + new->prepath = kstrndup(vol->prepath, strlen(vol->prepath), + GFP_KERNEL); + if (!new->prepath) + goto err_free_iocharset; + } + + return 0; + +err_free_iocharset: + kfree(new->iocharset); +err_free_domainname: + kfree(new->domainname); +err_free_unc: + kfree(new->UNC); +err_free_password: + kzfree(new->password); +err_free_username: + kfree(new->username); + kfree(new); + return -ENOMEM; +} + +/** + * dfs_cache_add_vol - add a cifs volume during mount() that will be handled by + * DFS cache refresh worker. + * + * @vol: cifs volume. + * @fullpath: origin full path. + * + * Return zero if volume was set up correctly, otherwise non-zero. + */ +int dfs_cache_add_vol(struct smb_vol *vol, const char *fullpath) +{ + int rc; + struct dfs_cache_vol_info *vi; + + if (!vol || !fullpath) + return -EINVAL; + + cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath); + + vi = kzalloc(sizeof(*vi), GFP_KERNEL); + if (!vi) + return -ENOMEM; + + vi->vi_fullpath = kstrndup(fullpath, strlen(fullpath), GFP_KERNEL); + if (!vi->vi_fullpath) { + rc = -ENOMEM; + goto err_free_vi; + } + + rc = dup_vol(vol, &vi->vi_vol); + if (rc) + goto err_free_fullpath; + + mutex_lock(&dfs_cache.dc_lock); + list_add_tail(&vi->vi_list, &dfs_cache.dc_vol_list); + mutex_unlock(&dfs_cache.dc_lock); + return 0; + +err_free_fullpath: + kfree(vi->vi_fullpath); +err_free_vi: + kfree(vi); + return rc; +} + +static inline struct dfs_cache_vol_info *find_vol(const char *fullpath) +{ + struct dfs_cache_vol_info *vi; + + list_for_each_entry(vi, &dfs_cache.dc_vol_list, vi_list) { + cifs_dbg(FYI, "%s: vi->vi_fullpath: %s\n", __func__, + vi->vi_fullpath); + if (!strcasecmp(vi->vi_fullpath, fullpath)) + return vi; + } + return ERR_PTR(-ENOENT); +} + +/** + * dfs_cache_update_vol - update vol info in DFS cache after failover + * + * @fullpath: fullpath to look up in volume list. + * @server: TCP ses pointer. + * + * Return zero if volume was updated, otherwise non-zero. + */ +int dfs_cache_update_vol(const char *fullpath, struct TCP_Server_Info *server) +{ + int rc; + struct dfs_cache_vol_info *vi; + + if (!fullpath || !server) + return -EINVAL; + + cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath); + + mutex_lock(&dfs_cache.dc_lock); + + vi = find_vol(fullpath); + if (IS_ERR(vi)) { + rc = PTR_ERR(vi); + goto out; + } + + cifs_dbg(FYI, "%s: updating volume info\n", __func__); + memcpy(&vi->vi_vol.dstaddr, &server->dstaddr, + sizeof(vi->vi_vol.dstaddr)); + rc = 0; + +out: + mutex_unlock(&dfs_cache.dc_lock); + return rc; +} + +/** + * dfs_cache_del_vol - remove volume info in DFS cache during umount() + * + * @fullpath: fullpath to look up in volume list. + */ +void dfs_cache_del_vol(const char *fullpath) +{ + struct dfs_cache_vol_info *vi; + + if (!fullpath || !*fullpath) + return; + + cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath); + + mutex_lock(&dfs_cache.dc_lock); + vi = find_vol(fullpath); + if (!IS_ERR(vi)) + free_vol(vi); + mutex_unlock(&dfs_cache.dc_lock); +} + +/* Get all tcons that are within a DFS namespace and can be refreshed */ +static void get_tcons(struct TCP_Server_Info *server, struct list_head *head) +{ + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + INIT_LIST_HEAD(head); + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + if (!tcon->need_reconnect && !tcon->need_reopen_files && + tcon->dfs_path) { + tcon->tc_count++; + list_add_tail(&tcon->ulist, head); + } + } + if (ses->tcon_ipc && !ses->tcon_ipc->need_reconnect && + ses->tcon_ipc->dfs_path) { + list_add_tail(&ses->tcon_ipc->ulist, head); + } + } + spin_unlock(&cifs_tcp_ses_lock); +} + +/* Refresh DFS cache entry from a given tcon */ +static void do_refresh_tcon(struct dfs_cache *dc, struct cifs_tcon *tcon) +{ + int rc = 0; + unsigned int xid; + char *path, *npath; + unsigned int h; + struct dfs_cache_entry *ce; + struct dfs_info3_param *refs = NULL; + int numrefs = 0; + + xid = get_xid(); + + path = tcon->dfs_path + 1; + + rc = get_normalized_path(path, &npath); + if (rc) + goto out; + + mutex_lock(&dfs_cache_list_lock); + ce = find_cache_entry(npath, &h); + mutex_unlock(&dfs_cache_list_lock); + + if (IS_ERR(ce)) { + rc = PTR_ERR(ce); + goto out; + } + + if (!cache_entry_expired(ce)) + goto out; + + if (unlikely(!tcon->ses->server->ops->get_dfs_refer)) { + rc = -EOPNOTSUPP; + } else { + rc = tcon->ses->server->ops->get_dfs_refer(xid, tcon->ses, path, + &refs, &numrefs, + dc->dc_nlsc, + tcon->remap); + if (!rc) { + mutex_lock(&dfs_cache_list_lock); + ce = __update_cache_entry(npath, refs, numrefs); + mutex_unlock(&dfs_cache_list_lock); + dump_refs(refs, numrefs); + free_dfs_info_array(refs, numrefs); + if (IS_ERR(ce)) + rc = PTR_ERR(ce); + } + } + if (rc) + cifs_dbg(FYI, "%s: failed to update expired entry\n", __func__); +out: + free_xid(xid); + free_normalized_path(path, npath); +} + +/* + * Worker that will refresh DFS cache based on lowest TTL value from a DFS + * referral. + * + * FIXME: ensure that all requests are sent to DFS root for refreshing the + * cache. + */ +static void refresh_cache_worker(struct work_struct *work) +{ + struct dfs_cache *dc = container_of(work, struct dfs_cache, + dc_refresh.work); + struct dfs_cache_vol_info *vi; + struct TCP_Server_Info *server; + LIST_HEAD(list); + struct cifs_tcon *tcon, *ntcon; + + mutex_lock(&dc->dc_lock); + + list_for_each_entry(vi, &dc->dc_vol_list, vi_list) { + server = cifs_find_tcp_session(&vi->vi_vol); + if (IS_ERR_OR_NULL(server)) + continue; + if (server->tcpStatus != CifsGood) + goto next; + get_tcons(server, &list); + list_for_each_entry_safe(tcon, ntcon, &list, ulist) { + do_refresh_tcon(dc, tcon); + list_del_init(&tcon->ulist); + cifs_put_tcon(tcon); + } +next: + cifs_put_tcp_session(server, 0); + } + queue_delayed_work(cifsiod_wq, &dc->dc_refresh, dc->dc_ttl * HZ); + mutex_unlock(&dc->dc_lock); +} diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h new file mode 100644 index 000000000000..22f366514f3a --- /dev/null +++ b/fs/cifs/dfs_cache.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DFS referral cache routines + * + * Copyright (c) 2018 Paulo Alcantara <palcantara@suse.de> + */ + +#ifndef _CIFS_DFS_CACHE_H +#define _CIFS_DFS_CACHE_H + +#include <linux/nls.h> +#include <linux/list.h> +#include "cifsglob.h" + +struct dfs_cache_tgt_list { + int tl_numtgts; + struct list_head tl_list; +}; + +struct dfs_cache_tgt_iterator { + char *it_name; + struct list_head it_list; +}; + +extern int dfs_cache_init(void); +extern void dfs_cache_destroy(void); +extern const struct file_operations dfscache_proc_fops; + +extern int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, + const struct nls_table *nls_codepage, int remap, + const char *path, struct dfs_info3_param *ref, + struct dfs_cache_tgt_list *tgt_list); +extern int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref, + struct dfs_cache_tgt_list *tgt_list); +extern int dfs_cache_update_tgthint(const unsigned int xid, + struct cifs_ses *ses, + const struct nls_table *nls_codepage, + int remap, const char *path, + const struct dfs_cache_tgt_iterator *it); +extern int +dfs_cache_noreq_update_tgthint(const char *path, + const struct dfs_cache_tgt_iterator *it); +extern int dfs_cache_get_tgt_referral(const char *path, + const struct dfs_cache_tgt_iterator *it, + struct dfs_info3_param *ref); +extern int dfs_cache_add_vol(struct smb_vol *vol, const char *fullpath); +extern int dfs_cache_update_vol(const char *fullpath, + struct TCP_Server_Info *server); +extern void dfs_cache_del_vol(const char *fullpath); + +static inline struct dfs_cache_tgt_iterator * +dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl, + struct dfs_cache_tgt_iterator *it) +{ + if (!tl || list_empty(&tl->tl_list) || !it || + list_is_last(&it->it_list, &tl->tl_list)) + return NULL; + return list_next_entry(it, it_list); +} + +static inline struct dfs_cache_tgt_iterator * +dfs_cache_get_tgt_iterator(struct dfs_cache_tgt_list *tl) +{ + if (!tl) + return NULL; + return list_first_entry_or_null(&tl->tl_list, + struct dfs_cache_tgt_iterator, + it_list); +} + +static inline void dfs_cache_free_tgts(struct dfs_cache_tgt_list *tl) +{ + struct dfs_cache_tgt_iterator *it, *nit; + + if (!tl || list_empty(&tl->tl_list)) + return; + list_for_each_entry_safe(it, nit, &tl->tl_list, it_list) { + list_del(&it->it_list); + kfree(it->it_name); + kfree(it); + } + tl->tl_numtgts = 0; +} + +static inline const char * +dfs_cache_get_tgt_name(const struct dfs_cache_tgt_iterator *it) +{ + return it ? it->it_name : NULL; +} + +static inline int +dfs_cache_get_nr_tgts(const struct dfs_cache_tgt_list *tl) +{ + return tl ? tl->tl_numtgts : 0; +} + +#endif /* _CIFS_DFS_CACHE_H */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 6706328ce03f..2c7689f3998d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -33,6 +33,7 @@ #include <linux/mount.h> #include <linux/slab.h> #include <linux/swap.h> +#include <linux/mm.h> #include <asm/div64.h> #include "cifsfs.h" #include "cifspdu.h" @@ -732,7 +733,8 @@ reopen_success: if (can_flush) { rc = filemap_write_and_wait(inode->i_mapping); - mapping_set_error(inode->i_mapping, rc); + if (!is_interrupt_error(rc)) + mapping_set_error(inode->i_mapping, rc); if (tcon->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, @@ -1131,14 +1133,18 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) { + if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { free_xid(xid); return -EINVAL; } + BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > + PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), + PAGE_SIZE); max_num = (max_buf - sizeof(struct smb_hdr)) / sizeof(LOCKING_ANDX_RANGE); buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); @@ -1471,12 +1477,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) + if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) return -EINVAL; + BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > + PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), + PAGE_SIZE); max_num = (max_buf - sizeof(struct smb_hdr)) / sizeof(LOCKING_ANDX_RANGE); buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); @@ -2109,6 +2119,7 @@ static int cifs_writepages(struct address_space *mapping, pgoff_t end, index; struct cifs_writedata *wdata; int rc = 0; + int saved_rc = 0; unsigned int xid; /* @@ -2137,8 +2148,10 @@ retry: rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, &wsize, &credits); - if (rc) + if (rc != 0) { + done = true; break; + } tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1; @@ -2146,6 +2159,7 @@ retry: &found_pages); if (!wdata) { rc = -ENOMEM; + done = true; add_credits_and_wake_if(server, credits, 0); break; } @@ -2174,7 +2188,7 @@ retry: if (rc != 0) { add_credits_and_wake_if(server, wdata->credits, 0); for (i = 0; i < nr_pages; ++i) { - if (rc == -EAGAIN) + if (is_retryable_error(rc)) redirty_page_for_writepage(wbc, wdata->pages[i]); else @@ -2182,7 +2196,7 @@ retry: end_page_writeback(wdata->pages[i]); put_page(wdata->pages[i]); } - if (rc != -EAGAIN) + if (!is_retryable_error(rc)) mapping_set_error(mapping, rc); } kref_put(&wdata->refcount, cifs_writedata_release); @@ -2192,6 +2206,15 @@ retry: continue; } + /* Return immediately if we received a signal during writing */ + if (is_interrupt_error(rc)) { + done = true; + break; + } + + if (rc != 0 && saved_rc == 0) + saved_rc = rc; + wbc->nr_to_write -= nr_pages; if (wbc->nr_to_write <= 0) done = true; @@ -2209,6 +2232,9 @@ retry: goto retry; } + if (saved_rc != 0) + rc = saved_rc; + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; @@ -2241,8 +2267,8 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc) set_page_writeback(page); retry_write: rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); - if (rc == -EAGAIN) { - if (wbc->sync_mode == WB_SYNC_ALL) + if (is_retryable_error(rc)) { + if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) goto retry_write; redirty_page_for_writepage(wbc, page); } else if (rc != 0) { @@ -2617,11 +2643,13 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, if (rc) break; + cur_len = min_t(const size_t, len, wsize); + if (ctx->direct_io) { ssize_t result; result = iov_iter_get_pages_alloc( - from, &pagevec, wsize, &start); + from, &pagevec, cur_len, &start); if (result < 0) { cifs_dbg(VFS, "direct_writev couldn't get user pages " @@ -2630,6 +2658,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, result, from->type, from->iov_offset, from->count); dump_stack(); + + rc = result; + add_credits_and_wake_if(server, credits, 0); break; } cur_len = (size_t)result; @@ -3313,13 +3344,16 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, cur_len, &start); if (result < 0) { cifs_dbg(VFS, - "couldn't get user pages (cur_len=%zd)" + "couldn't get user pages (rc=%zd)" " iter type %d" " iov_offset %zd count %zd\n", result, direct_iov.type, direct_iov.iov_offset, direct_iov.count); dump_stack(); + + rc = result; + add_credits_and_wake_if(server, credits, 0); break; } cur_len = (size_t)result; @@ -3956,7 +3990,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, INIT_LIST_HEAD(tmplist); - page = list_entry(page_list->prev, struct page, lru); + page = lru_to_page(page_list); /* * Lock the page and put it in the cache. Since no one else diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a81a9df997c1..478003644916 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -333,7 +333,7 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) fattr->cf_mtime = timespec64_trunc(fattr->cf_mtime, sb->s_time_gran); fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime; fattr->cf_nlink = 2; - fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; + fattr->cf_flags = CIFS_FATTR_DFS_REFERRAL; } static int @@ -730,7 +730,6 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, FILE_ALL_INFO *data, struct super_block *sb, int xid, const struct cifs_fid *fid) { - bool validinum = false; __u16 srchflgs; int rc = 0, tmprc = ENOSYS; struct cifs_tcon *tcon; @@ -821,7 +820,6 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, (FILE_DIRECTORY_INFO *)data, cifs_sb); fattr.cf_uniqueid = le64_to_cpu( ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId); - validinum = true; cifs_buf_release(srchinf->ntwrk_buf_start); } @@ -840,31 +838,29 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, */ if (*inode == NULL) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { - if (validinum == false) { - if (server->ops->get_srv_inum) - tmprc = server->ops->get_srv_inum(xid, - tcon, cifs_sb, full_path, - &fattr.cf_uniqueid, data); - if (tmprc) { - cifs_dbg(FYI, "GetSrvInodeNum rc %d\n", - tmprc); - fattr.cf_uniqueid = iunique(sb, ROOT_I); - cifs_autodisable_serverino(cifs_sb); - } else if ((fattr.cf_uniqueid == 0) && - strlen(full_path) == 0) { - /* some servers ret bad root ino ie 0 */ - cifs_dbg(FYI, "Invalid (0) inodenum\n"); - fattr.cf_flags |= - CIFS_FATTR_FAKE_ROOT_INO; - fattr.cf_uniqueid = - simple_hashstr(tcon->treeName); - } + if (server->ops->get_srv_inum) + tmprc = server->ops->get_srv_inum(xid, + tcon, cifs_sb, full_path, + &fattr.cf_uniqueid, data); + if (tmprc) { + cifs_dbg(FYI, "GetSrvInodeNum rc %d\n", + tmprc); + fattr.cf_uniqueid = iunique(sb, ROOT_I); + cifs_autodisable_serverino(cifs_sb); + } else if ((fattr.cf_uniqueid == 0) && + strlen(full_path) == 0) { + /* some servers ret bad root ino ie 0 */ + cifs_dbg(FYI, "Invalid (0) inodenum\n"); + fattr.cf_flags |= + CIFS_FATTR_FAKE_ROOT_INO; + fattr.cf_uniqueid = + simple_hashstr(tcon->treeName); } } else fattr.cf_uniqueid = iunique(sb, ROOT_I); } else { - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) && - validinum == false && server->ops->get_srv_inum) { + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + && server->ops->get_srv_inum) { /* * Pass a NULL tcon to ensure we don't make a round * trip to the server. This only works for SMB2+. @@ -2261,6 +2257,11 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) * the flush returns error? */ rc = filemap_write_and_wait(inode->i_mapping); + if (is_interrupt_error(rc)) { + rc = -ERESTARTSYS; + goto out; + } + mapping_set_error(inode->i_mapping, rc); rc = 0; @@ -2404,6 +2405,11 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) * the flush returns error? */ rc = filemap_write_and_wait(inode->i_mapping); + if (is_interrupt_error(rc)) { + rc = -ERESTARTSYS; + goto cifs_setattr_exit; + } + mapping_set_error(inode->i_mapping, rc); rc = 0; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 8a41f4eba726..bee203055b30 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -111,21 +111,27 @@ struct cifs_tcon * tconInfoAlloc(void) { struct cifs_tcon *ret_buf; - ret_buf = kzalloc(sizeof(struct cifs_tcon), GFP_KERNEL); - if (ret_buf) { - atomic_inc(&tconInfoAllocCount); - ret_buf->tidStatus = CifsNew; - ++ret_buf->tc_count; - INIT_LIST_HEAD(&ret_buf->openFileList); - INIT_LIST_HEAD(&ret_buf->tcon_list); - spin_lock_init(&ret_buf->open_file_lock); - mutex_init(&ret_buf->crfid.fid_mutex); - ret_buf->crfid.fid = kzalloc(sizeof(struct cifs_fid), - GFP_KERNEL); - spin_lock_init(&ret_buf->stat_lock); - atomic_set(&ret_buf->num_local_opens, 0); - atomic_set(&ret_buf->num_remote_opens, 0); + + ret_buf = kzalloc(sizeof(*ret_buf), GFP_KERNEL); + if (!ret_buf) + return NULL; + ret_buf->crfid.fid = kzalloc(sizeof(*ret_buf->crfid.fid), GFP_KERNEL); + if (!ret_buf->crfid.fid) { + kfree(ret_buf); + return NULL; } + + atomic_inc(&tconInfoAllocCount); + ret_buf->tidStatus = CifsNew; + ++ret_buf->tc_count; + INIT_LIST_HEAD(&ret_buf->openFileList); + INIT_LIST_HEAD(&ret_buf->tcon_list); + spin_lock_init(&ret_buf->open_file_lock); + mutex_init(&ret_buf->crfid.fid_mutex); + spin_lock_init(&ret_buf->stat_lock); + atomic_set(&ret_buf->num_local_opens, 0); + atomic_set(&ret_buf->num_remote_opens, 0); + return ret_buf; } @@ -140,6 +146,9 @@ tconInfoFree(struct cifs_tcon *buf_to_free) kfree(buf_to_free->nativeFileSystem); kzfree(buf_to_free->password); kfree(buf_to_free->crfid.fid); +#ifdef CONFIG_CIFS_DFS_UPCALL + kfree(buf_to_free->dfs_path); +#endif kfree(buf_to_free); } @@ -525,9 +534,17 @@ void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { + struct cifs_tcon *tcon = NULL; + + if (cifs_sb->master_tlink) + tcon = cifs_sb_master_tcon(cifs_sb); + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; - cifs_dbg(VFS, "Autodisabling the use of server inode numbers on %s. This server doesn't seem to support them properly. Hardlinks will not be recognized on this mount. Consider mounting with the \"noserverino\" option to silence this message.\n", - cifs_sb_master_tcon(cifs_sb)->treeName); + cifs_dbg(VFS, "Autodisabling the use of server inode numbers on %s.\n", + tcon ? tcon->treeName : "new server"); + cifs_dbg(VFS, "The server doesn't seem to support them properly or the files might be on different servers (DFS).\n"); + cifs_dbg(VFS, "Hardlinks will not be recognized on this mount. Consider mounting with the \"noserverino\" option to silence this message.\n"); + } } @@ -732,6 +749,8 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, goto parse_DFS_referrals_exit; } + node->ttl = le32_to_cpu(ref->TimeToLive); + ref++; } @@ -933,3 +952,20 @@ void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page, else if (page == 0) *len = rqst->rq_pagesz - rqst->rq_offset; } + +void extract_unc_hostname(const char *unc, const char **h, size_t *len) +{ + const char *end; + + /* skip initial slashes */ + while (*unc && (*unc == '\\' || *unc == '/')) + unc++; + + end = unc; + + while (*end && !(*end == '\\' || *end == '/')) + end++; + + *h = unc; + *len = end - unc; +} diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index e169e1a5fd35..3925a7bfc74d 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -655,7 +655,14 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, /* scan and find it */ int i; char *cur_ent; - char *end_of_smb = cfile->srch_inf.ntwrk_buf_start + + char *end_of_smb; + + if (cfile->srch_inf.ntwrk_buf_start == NULL) { + cifs_dbg(VFS, "ntwrk_buf_start is NULL during readdir\n"); + return -EIO; + } + + end_of_smb = cfile->srch_inf.ntwrk_buf_start + server->ops->calc_smb_size( cfile->srch_inf.ntwrk_buf_start, server); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index aa23c00367ec..dcd49ad60c83 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -534,9 +534,9 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested) if (global_secflags & CIFSSEC_MAY_NTLM) return NTLM; default: - /* Fallthrough to attempt LANMAN authentication next */ break; } + /* Fallthrough - to attempt LANMAN authentication next */ case CIFS_NEGFLAVOR_LANMAN: switch (requested) { case LANMAN: @@ -1154,14 +1154,12 @@ out: static int _sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data) { - struct smb_hdr *smb_buf; SESSION_SETUP_ANDX *pSMB; struct cifs_ses *ses = sess_data->ses; __u32 capabilities; char *bcc_ptr; pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base; - smb_buf = (struct smb_hdr *)pSMB; capabilities = cifs_ssetup_hdr(ses, pSMB); if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 378151e09e91..32a6c020478f 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -929,19 +929,18 @@ cifs_unix_dfs_readlink(const unsigned int xid, struct cifs_tcon *tcon, { #ifdef CONFIG_CIFS_DFS_UPCALL int rc; - unsigned int num_referrals = 0; - struct dfs_info3_param *referrals = NULL; + struct dfs_info3_param referral = {0}; - rc = get_dfs_path(xid, tcon->ses, searchName, nls_codepage, - &num_referrals, &referrals, 0); + rc = get_dfs_path(xid, tcon->ses, searchName, nls_codepage, &referral, + 0); - if (!rc && num_referrals > 0) { - *symlinkinfo = kstrndup(referrals->node_name, - strlen(referrals->node_name), + if (!rc) { + *symlinkinfo = kstrndup(referral.node_name, + strlen(referral.node_name), GFP_KERNEL); + free_dfs_info_param(&referral); if (!*symlinkinfo) rc = -ENOMEM; - free_dfs_info_array(referrals, num_referrals); } return rc; #else /* No DFS support */ diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 4ed10dd086e6..b204e84b87fb 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -122,12 +122,14 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, /* * Accessing maxBuf is racy with cifs_reconnect - need to store value - * and check it for zero before using. + * and check it before using. */ max_buf = tcon->ses->server->maxBuf; - if (!max_buf) + if (max_buf < sizeof(struct smb2_lock_element)) return -EINVAL; + BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf, PAGE_SIZE); max_num = max_buf / sizeof(struct smb2_lock_element); buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL); if (!buf) @@ -264,6 +266,8 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) return -EINVAL; } + BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf, PAGE_SIZE); max_num = max_buf / sizeof(struct smb2_lock_element); buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL); if (!buf) { diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index a8999f930b22..f14533da3a93 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -49,7 +49,6 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct cifs_fid fid; struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = ses->server; int num_rqst = 0; struct smb_rqst rqst[3]; int resp_buftype[3]; @@ -97,7 +96,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, if (rc) goto finished; - smb2_set_next_command(server, &rqst[num_rqst++], 0); + smb2_set_next_command(tcon, &rqst[num_rqst++]); /* Operation */ switch (command) { @@ -111,7 +110,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, SMB2_O_INFO_FILE, 0, sizeof(struct smb2_file_all_info) + PATH_MAX * 2, 0, NULL); - smb2_set_next_command(server, &rqst[num_rqst], 0); + smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); break; case SMB2_OP_DELETE: @@ -134,7 +133,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_DISPOSITION_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); - smb2_set_next_command(server, &rqst[num_rqst], 1); + smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); break; case SMB2_OP_SET_EOF: @@ -149,7 +148,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_END_OF_FILE_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); - smb2_set_next_command(server, &rqst[num_rqst], 0); + smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); break; case SMB2_OP_SET_INFO: @@ -165,7 +164,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_BASIC_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); - smb2_set_next_command(server, &rqst[num_rqst], 0); + smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); break; case SMB2_OP_RENAME: @@ -189,7 +188,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_RENAME_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); - smb2_set_next_command(server, &rqst[num_rqst], 0); + smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); break; case SMB2_OP_HARDLINK: @@ -213,7 +212,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_LINK_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); - smb2_set_next_command(server, &rqst[num_rqst], 0); + smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); break; default: @@ -388,7 +387,6 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, rc = -ENOMEM; goto smb2_rename_path; } - rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access, FILE_OPEN, 0, smb2_to_name, command); smb2_rename_path: diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index d47b7f5dfa6c..924269cec135 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -379,8 +379,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_NONEXISTENT_EA_ENTRY, -EIO, "STATUS_NONEXISTENT_EA_ENTRY"}, {STATUS_NO_EAS_ON_FILE, -ENODATA, "STATUS_NO_EAS_ON_FILE"}, {STATUS_EA_CORRUPT_ERROR, -EIO, "STATUS_EA_CORRUPT_ERROR"}, - {STATUS_FILE_LOCK_CONFLICT, -EIO, "STATUS_FILE_LOCK_CONFLICT"}, - {STATUS_LOCK_NOT_GRANTED, -EIO, "STATUS_LOCK_NOT_GRANTED"}, + {STATUS_FILE_LOCK_CONFLICT, -EACCES, "STATUS_FILE_LOCK_CONFLICT"}, + {STATUS_LOCK_NOT_GRANTED, -EACCES, "STATUS_LOCK_NOT_GRANTED"}, {STATUS_DELETE_PENDING, -ENOENT, "STATUS_DELETE_PENDING"}, {STATUS_CTL_FILE_NOT_SUPPORTED, -ENOSYS, "STATUS_CTL_FILE_NOT_SUPPORTED"}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e25c7aade98a..cf7eb891804f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -831,72 +831,48 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, { int rc; __le16 *utf16_path; - __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; - struct cifs_open_parms oparms; - struct cifs_fid fid; - struct smb2_file_full_ea_info *smb2_data; - int ea_buf_size = SMB2_MIN_EA_BUF; + struct kvec rsp_iov = {NULL, 0}; + int buftype = CIFS_NO_BUFFER; + struct smb2_query_info_rsp *rsp; + struct smb2_file_full_ea_info *info = NULL; utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) return -ENOMEM; - oparms.tcon = tcon; - oparms.desired_access = FILE_READ_EA; - oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - oparms.fid = &fid; - oparms.reconnect = false; - - rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL); - kfree(utf16_path); + rc = smb2_query_info_compound(xid, tcon, utf16_path, + FILE_READ_EA, + FILE_FULL_EA_INFORMATION, + SMB2_O_INFO_FILE, + SMB2_MAX_EA_BUF, + &rsp_iov, &buftype, cifs_sb); if (rc) { - cifs_dbg(FYI, "open failed rc=%d\n", rc); - return rc; - } - - while (1) { - smb2_data = kzalloc(ea_buf_size, GFP_KERNEL); - if (smb2_data == NULL) { - SMB2_close(xid, tcon, fid.persistent_fid, - fid.volatile_fid); - return -ENOMEM; - } - - rc = SMB2_query_eas(xid, tcon, fid.persistent_fid, - fid.volatile_fid, - ea_buf_size, smb2_data); - - if (rc != -E2BIG) - break; - - kfree(smb2_data); - ea_buf_size <<= 1; - - if (ea_buf_size > SMB2_MAX_EA_BUF) { - cifs_dbg(VFS, "EA size is too large\n"); - SMB2_close(xid, tcon, fid.persistent_fid, - fid.volatile_fid); - return -ENOMEM; - } + /* + * If ea_name is NULL (listxattr) and there are no EAs, + * return 0 as it's not an error. Otherwise, the specified + * ea_name was not found. + */ + if (!ea_name && rc == -ENODATA) + rc = 0; + goto qeas_exit; } - SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base; + rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset), + le32_to_cpu(rsp->OutputBufferLength), + &rsp_iov, + sizeof(struct smb2_file_full_ea_info)); + if (rc) + goto qeas_exit; - /* - * If ea_name is NULL (listxattr) and there are no EAs, return 0 as it's - * not an error. Otherwise, the specified ea_name was not found. - */ - if (!rc) - rc = move_smb2_ea_to_cifs(ea_data, buf_size, smb2_data, - SMB2_MAX_EA_BUF, ea_name); - else if (!ea_name && rc == -ENODATA) - rc = 0; + info = (struct smb2_file_full_ea_info *)( + le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp); + rc = move_smb2_ea_to_cifs(ea_data, buf_size, info, + le32_to_cpu(rsp->OutputBufferLength), ea_name); - kfree(smb2_data); + qeas_exit: + kfree(utf16_path); + free_rsp_buf(buftype, rsp_iov.iov_base); return rc; } @@ -907,14 +883,27 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, const __u16 ea_value_len, const struct nls_table *nls_codepage, struct cifs_sb_info *cifs_sb) { - int rc; - __le16 *utf16_path; - __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; - struct cifs_open_parms oparms; - struct cifs_fid fid; - struct smb2_file_full_ea_info *ea; + struct cifs_ses *ses = tcon->ses; + __le16 *utf16_path = NULL; int ea_name_len = strlen(ea_name); + int flags = 0; int len; + struct smb_rqst rqst[3]; + int resp_buftype[3]; + struct kvec rsp_iov[3]; + struct kvec open_iov[SMB2_CREATE_IOV_SIZE]; + struct cifs_open_parms oparms; + __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + struct cifs_fid fid; + struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE]; + unsigned int size[1]; + void *data[1]; + struct smb2_file_full_ea_info *ea = NULL; + struct kvec close_iov[1]; + int rc; + + if (smb3_encryption_required(tcon)) + flags |= CIFS_TRANSFORM_REQ; if (ea_name_len > 255) return -EINVAL; @@ -923,6 +912,16 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, if (!utf16_path) return -ENOMEM; + memset(rqst, 0, sizeof(rqst)); + resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER; + memset(rsp_iov, 0, sizeof(rsp_iov)); + + /* Open */ + memset(&open_iov, 0, sizeof(open_iov)); + rqst[0].rq_iov = open_iov; + rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE; + + memset(&oparms, 0, sizeof(oparms)); oparms.tcon = tcon; oparms.desired_access = FILE_WRITE_EA; oparms.disposition = FILE_OPEN; @@ -933,18 +932,22 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = &fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL); - kfree(utf16_path); - if (rc) { - cifs_dbg(FYI, "open failed rc=%d\n", rc); - return rc; - } + rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, utf16_path); + if (rc) + goto sea_exit; + smb2_set_next_command(tcon, &rqst[0]); + + + /* Set Info */ + memset(&si_iov, 0, sizeof(si_iov)); + rqst[1].rq_iov = si_iov; + rqst[1].rq_nvec = 1; len = sizeof(ea) + ea_name_len + ea_value_len + 1; ea = kzalloc(len, GFP_KERNEL); if (ea == NULL) { - SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); - return -ENOMEM; + rc = -ENOMEM; + goto sea_exit; } ea->ea_name_length = ea_name_len; @@ -952,12 +955,36 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, memcpy(ea->ea_data, ea_name, ea_name_len + 1); memcpy(ea->ea_data + ea_name_len + 1, ea_value, ea_value_len); - rc = SMB2_set_ea(xid, tcon, fid.persistent_fid, fid.volatile_fid, ea, - len); - kfree(ea); + size[0] = len; + data[0] = ea; + + rc = SMB2_set_info_init(tcon, &rqst[1], COMPOUND_FID, + COMPOUND_FID, current->tgid, + FILE_FULL_EA_INFORMATION, + SMB2_O_INFO_FILE, 0, data, size); + smb2_set_next_command(tcon, &rqst[1]); + smb2_set_related(&rqst[1]); - SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + /* Close */ + memset(&close_iov, 0, sizeof(close_iov)); + rqst[2].rq_iov = close_iov; + rqst[2].rq_nvec = 1; + rc = SMB2_close_init(tcon, &rqst[2], COMPOUND_FID, COMPOUND_FID); + smb2_set_related(&rqst[2]); + + rc = compound_send_recv(xid, ses, flags, 3, rqst, + resp_buftype, rsp_iov); + + sea_exit: + kfree(ea); + kfree(utf16_path); + SMB2_open_free(&rqst[0]); + SMB2_set_info_free(&rqst[1]); + SMB2_close_free(&rqst[2]); + free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); + free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); + free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base); return rc; } #endif @@ -1194,7 +1221,7 @@ smb2_ioctl_query_info(const unsigned int xid, rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, path); if (rc) goto iqinf_exit; - smb2_set_next_command(ses->server, &rqst[0], 0); + smb2_set_next_command(tcon, &rqst[0]); /* Query */ memset(&qi_iov, 0, sizeof(qi_iov)); @@ -1208,7 +1235,7 @@ smb2_ioctl_query_info(const unsigned int xid, qi.output_buffer_length, buffer); if (rc) goto iqinf_exit; - smb2_set_next_command(ses->server, &rqst[1], 0); + smb2_set_next_command(tcon, &rqst[1]); smb2_set_related(&rqst[1]); /* Close */ @@ -1761,49 +1788,79 @@ smb2_set_related(struct smb_rqst *rqst) char smb2_padding[7] = {0, 0, 0, 0, 0, 0, 0}; void -smb2_set_next_command(struct TCP_Server_Info *server, struct smb_rqst *rqst, - bool has_space_for_padding) +smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst) { struct smb2_sync_hdr *shdr; + struct cifs_ses *ses = tcon->ses; + struct TCP_Server_Info *server = ses->server; unsigned long len = smb_rqst_len(server, rqst); + int i, num_padding; /* SMB headers in a compound are 8 byte aligned. */ - if (len & 7) { - if (has_space_for_padding) { - len = rqst->rq_iov[rqst->rq_nvec - 1].iov_len; - rqst->rq_iov[rqst->rq_nvec - 1].iov_len = - (len + 7) & ~7; - } else { - rqst->rq_iov[rqst->rq_nvec].iov_base = smb2_padding; - rqst->rq_iov[rqst->rq_nvec].iov_len = 8 - (len & 7); - rqst->rq_nvec++; + + /* No padding needed */ + if (!(len & 7)) + goto finished; + + num_padding = 8 - (len & 7); + if (!smb3_encryption_required(tcon)) { + /* + * If we do not have encryption then we can just add an extra + * iov for the padding. + */ + rqst->rq_iov[rqst->rq_nvec].iov_base = smb2_padding; + rqst->rq_iov[rqst->rq_nvec].iov_len = num_padding; + rqst->rq_nvec++; + len += num_padding; + } else { + /* + * We can not add a small padding iov for the encryption case + * because the encryption framework can not handle the padding + * iovs. + * We have to flatten this into a single buffer and add + * the padding to it. + */ + for (i = 1; i < rqst->rq_nvec; i++) { + memcpy(rqst->rq_iov[0].iov_base + + rqst->rq_iov[0].iov_len, + rqst->rq_iov[i].iov_base, + rqst->rq_iov[i].iov_len); + rqst->rq_iov[0].iov_len += rqst->rq_iov[i].iov_len; } - len = smb_rqst_len(server, rqst); + memset(rqst->rq_iov[0].iov_base + rqst->rq_iov[0].iov_len, + 0, num_padding); + rqst->rq_iov[0].iov_len += num_padding; + len += num_padding; + rqst->rq_nvec = 1; } + finished: shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base); shdr->NextCommand = cpu_to_le32(len); } -static int -smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) +/* + * Passes the query info response back to the caller on success. + * Caller need to free this with free_rsp_buf(). + */ +int +smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, + __le16 *utf16_path, u32 desired_access, + u32 class, u32 type, u32 output_len, + struct kvec *rsp, int *buftype, + struct cifs_sb_info *cifs_sb) { - struct smb2_query_info_rsp *rsp; - struct smb2_fs_full_size_info *info = NULL; + struct cifs_ses *ses = tcon->ses; + int flags = 0; struct smb_rqst rqst[3]; int resp_buftype[3]; struct kvec rsp_iov[3]; struct kvec open_iov[SMB2_CREATE_IOV_SIZE]; struct kvec qi_iov[1]; struct kvec close_iov[1]; - struct cifs_ses *ses = tcon->ses; - struct TCP_Server_Info *server = ses->server; - __le16 srch_path = 0; /* Null - open root of share */ u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct cifs_open_parms oparms; struct cifs_fid fid; - int flags = 0; int rc; if (smb3_encryption_required(tcon)) @@ -1818,29 +1875,31 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE; oparms.tcon = tcon; - oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.desired_access = desired_access; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + if (cifs_sb && backup_cred(cifs_sb)) + oparms.create_options = CREATE_OPEN_BACKUP_INTENT; + else + oparms.create_options = 0; oparms.fid = &fid; oparms.reconnect = false; - rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, &srch_path); + rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, utf16_path); if (rc) - goto qfs_exit; - smb2_set_next_command(server, &rqst[0], 0); + goto qic_exit; + smb2_set_next_command(tcon, &rqst[0]); memset(&qi_iov, 0, sizeof(qi_iov)); rqst[1].rq_iov = qi_iov; rqst[1].rq_nvec = 1; rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID, - FS_FULL_SIZE_INFORMATION, - SMB2_O_INFO_FILESYSTEM, 0, - sizeof(struct smb2_fs_full_size_info), 0, + class, type, 0, + output_len, 0, NULL); if (rc) - goto qfs_exit; - smb2_set_next_command(server, &rqst[1], 0); + goto qic_exit; + smb2_set_next_command(tcon, &rqst[1]); smb2_set_related(&rqst[1]); memset(&close_iov, 0, sizeof(close_iov)); @@ -1849,32 +1908,61 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_close_init(tcon, &rqst[2], COMPOUND_FID, COMPOUND_FID); if (rc) - goto qfs_exit; + goto qic_exit; smb2_set_related(&rqst[2]); rc = compound_send_recv(xid, ses, flags, 3, rqst, resp_buftype, rsp_iov); + if (rc) { + free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); + goto qic_exit; + } + *rsp = rsp_iov[1]; + *buftype = resp_buftype[1]; + + qic_exit: + SMB2_open_free(&rqst[0]); + SMB2_query_info_free(&rqst[1]); + SMB2_close_free(&rqst[2]); + free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); + free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base); + return rc; +} + +static int +smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, + struct kstatfs *buf) +{ + struct smb2_query_info_rsp *rsp; + struct smb2_fs_full_size_info *info = NULL; + __le16 utf16_path = 0; /* Null - open root of share */ + struct kvec rsp_iov = {NULL, 0}; + int buftype = CIFS_NO_BUFFER; + int rc; + + + rc = smb2_query_info_compound(xid, tcon, &utf16_path, + FILE_READ_ATTRIBUTES, + FS_FULL_SIZE_INFORMATION, + SMB2_O_INFO_FILESYSTEM, + sizeof(struct smb2_fs_full_size_info), + &rsp_iov, &buftype, NULL); if (rc) goto qfs_exit; - rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; + rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base; buf->f_type = SMB2_MAGIC_NUMBER; info = (struct smb2_fs_full_size_info *)( le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp); rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset), le32_to_cpu(rsp->OutputBufferLength), - &rsp_iov[1], + &rsp_iov, sizeof(struct smb2_fs_full_size_info)); if (!rc) smb2_copy_fs_info_to_kstatfs(info, buf); qfs_exit: - SMB2_open_free(&rqst[0]); - SMB2_query_info_free(&rqst[1]); - SMB2_close_free(&rqst[2]); - free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); - free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); - free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base); + free_rsp_buf(buftype, rsp_iov.iov_base); return rc; } @@ -2743,7 +2831,7 @@ init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign) smb2_sg_set_buf(&sg[idx++], rqst[i].rq_iov[j].iov_base + skip, rqst[i].rq_iov[j].iov_len - skip); - } + } for (j = 0; j < rqst[i].rq_npages; j++) { unsigned int len, offset; @@ -3384,8 +3472,10 @@ smb3_receive_transform(struct TCP_Server_Info *server, } /* TODO: add support for compounds containing READ. */ - if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server)) + if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server)) { + *num_mids = 1; return receive_encrypted_read(server, &mids[0]); + } return receive_encrypted_standard(server, mids, bufs, num_mids); } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 27f86537a5d1..50811a7dc0e0 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -50,6 +50,9 @@ #include "cifs_spnego.h" #include "smbdirect.h" #include "trace.h" +#ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs_cache.h" +#endif /* * The following table defines the expected "StructureSize" of SMB2 requests @@ -152,6 +155,86 @@ out: return; } +#ifdef CONFIG_CIFS_DFS_UPCALL +static int __smb2_reconnect(const struct nls_table *nlsc, + struct cifs_tcon *tcon) +{ + int rc; + struct dfs_cache_tgt_list tl; + struct dfs_cache_tgt_iterator *it = NULL; + char *tree; + const char *tcp_host; + size_t tcp_host_len; + const char *dfs_host; + size_t dfs_host_len; + + tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL); + if (!tree) + return -ENOMEM; + + if (tcon->ipc) { + snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", + tcon->ses->server->hostname); + rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc); + goto out; + } + + if (!tcon->dfs_path) { + rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc); + goto out; + } + + rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl); + if (rc) + goto out; + + extract_unc_hostname(tcon->ses->server->hostname, &tcp_host, + &tcp_host_len); + + for (it = dfs_cache_get_tgt_iterator(&tl); it; + it = dfs_cache_get_next_tgt(&tl, it)) { + const char *tgt = dfs_cache_get_tgt_name(it); + + extract_unc_hostname(tgt, &dfs_host, &dfs_host_len); + + if (dfs_host_len != tcp_host_len + || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) { + cifs_dbg(FYI, "%s: skipping %.*s, doesn't match %.*s", + __func__, + (int)dfs_host_len, dfs_host, + (int)tcp_host_len, tcp_host); + continue; + } + + snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt); + + rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc); + if (!rc) + break; + if (rc == -EREMOTE) + break; + } + + if (!rc) { + if (it) + rc = dfs_cache_noreq_update_tgthint(tcon->dfs_path + 1, + it); + else + rc = -ENOENT; + } + dfs_cache_free_tgts(&tl); +out: + kfree(tree); + return rc; +} +#else +static inline int __smb2_reconnect(const struct nls_table *nlsc, + struct cifs_tcon *tcon) +{ + return SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc); +} +#endif + static int smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) { @@ -159,6 +242,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) struct nls_table *nls_codepage; struct cifs_ses *ses; struct TCP_Server_Info *server; + int retries; /* * SMB2s NegProt, SessSetup, Logoff do not have tcon yet so @@ -192,9 +276,12 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) ses = tcon->ses; server = ses->server; + retries = server->nr_targets; + /* - * Give demultiplex thread up to 10 seconds to reconnect, should be - * greater than cifs socket timeout which is 7 seconds + * Give demultiplex thread up to 10 seconds to each target available for + * reconnect -- should be greater than cifs socket timeout which is 7 + * seconds. */ while (server->tcpStatus == CifsNeedReconnect) { /* @@ -225,6 +312,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) if (server->tcpStatus != CifsNeedReconnect) break; + if (--retries) + continue; + /* * on "soft" mounts we wait once. Hard mounts keep * retrying until process is killed or server comes @@ -234,6 +324,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n"); return -EHOSTDOWN; } + retries = server->nr_targets; } if (!tcon->ses->need_reconnect && !tcon->need_reconnect) @@ -271,7 +362,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) if (tcon->use_persistent) tcon->need_reopen_files = true; - rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); + rc = __smb2_reconnect(nls_codepage, tcon); mutex_unlock(&tcon->ses->session_mutex); cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); @@ -369,10 +460,6 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, } -/* offset is sizeof smb2_negotiate_req but rounded up to 8 bytes */ -#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) */ - - #define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1) #define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2) #define SMB2_POSIX_EXTENSIONS_AVAILABLE cpu_to_le16(0x100) @@ -409,10 +496,24 @@ static void assemble_neg_contexts(struct smb2_negotiate_req *req, unsigned int *total_len) { - char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT; + char *pneg_ctxt = (char *)req; unsigned int ctxt_len; - *total_len += 2; /* Add 2 due to round to 8 byte boundary for 1st ctxt */ + if (*total_len > 200) { + /* In case length corrupted don't want to overrun smb buffer */ + cifs_dbg(VFS, "Bad frame length assembling neg contexts\n"); + return; + } + + /* + * round up total_len of fixed part of SMB3 negotiate request to 8 + * byte boundary before adding negotiate contexts + */ + *total_len = roundup(*total_len, 8); + + pneg_ctxt = (*total_len) + (char *)req; + req->NegotiateContextOffset = cpu_to_le32(*total_len); + build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt); ctxt_len = DIV_ROUND_UP(sizeof(struct smb2_preauth_neg_context), 8) * 8; *total_len += ctxt_len; @@ -426,7 +527,6 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); *total_len += sizeof(struct smb2_posix_neg_context); - req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT); req->NegotiateContextCount = cpu_to_le16(3); } @@ -642,8 +742,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID); req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID); req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID); - req->DialectCount = cpu_to_le16(3); - total_len += 6; + req->Dialects[3] = cpu_to_le16(SMB311_PROT_ID); + req->DialectCount = cpu_to_le16(4); + total_len += 8; } else { /* otherwise send specific dialect */ req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); @@ -667,7 +768,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) else { memcpy(req->ClientGUID, server->client_guid, SMB2_CLIENT_GUID_SIZE); - if (ses->server->vals->protocol_id == SMB311_PROT_ID) + if ((ses->server->vals->protocol_id == SMB311_PROT_ID) || + (strcmp(ses->server->vals->version_string, + SMBDEFAULT_VERSION_STRING) == 0)) assemble_neg_contexts(req, &total_len); } iov[0].iov_base = (char *)req; @@ -712,7 +815,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { /* ops set to 3.0 by default for default so update */ ses->server->ops = &smb21_operations; - } + } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) + ses->server->ops = &smb311_operations; } else if (le16_to_cpu(rsp->DialectRevision) != ses->server->vals->protocol_id) { /* if requested single dialect ensure returned dialect matched */ @@ -859,13 +963,14 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) pneg_inbuf->DialectCount = cpu_to_le16(2); /* structure is big enough for 3 dialects, sending only 2 */ inbuflen = sizeof(*pneg_inbuf) - - sizeof(pneg_inbuf->Dialects[0]); + (2 * sizeof(pneg_inbuf->Dialects[0])); } else if (strcmp(tcon->ses->server->vals->version_string, SMBDEFAULT_VERSION_STRING) == 0) { pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID); pneg_inbuf->Dialects[1] = cpu_to_le16(SMB30_PROT_ID); pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID); - pneg_inbuf->DialectCount = cpu_to_le16(3); + pneg_inbuf->Dialects[3] = cpu_to_le16(SMB311_PROT_ID); + pneg_inbuf->DialectCount = cpu_to_le16(4); /* structure is big enough for 3 dialects */ inbuflen = sizeof(*pneg_inbuf); } else { @@ -1955,7 +2060,6 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, struct smb_rqst rqst; struct smb2_create_req *req; struct smb2_create_rsp *rsp = NULL; - struct TCP_Server_Info *server; struct cifs_ses *ses = tcon->ses; struct kvec iov[3]; /* make sure at least one for each open context */ struct kvec rsp_iov = {NULL, 0}; @@ -1978,9 +2082,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, if (!utf16_path) return -ENOMEM; - if (ses && (ses->server)) - server = ses->server; - else { + if (!ses || !(ses->server)) { rc = -EIO; goto err_free_path; } @@ -2768,18 +2870,6 @@ qinf_exit: return rc; } -int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - int ea_buf_size, struct smb2_file_full_ea_info *data) -{ - return query_info(xid, tcon, persistent_fid, volatile_fid, - FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, 0, - ea_buf_size, - sizeof(struct smb2_file_full_ea_info), - (void **)&data, - NULL); -} - int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, struct smb2_file_all_info *data) { @@ -3197,12 +3287,14 @@ smb2_async_readv(struct cifs_readdata *rdata) if (rdata->credits) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = shdr->CreditCharge; + shdr->CreditRequest = + cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1); spin_lock(&server->req_lock); server->credits += rdata->credits - le16_to_cpu(shdr->CreditCharge); spin_unlock(&server->req_lock); wake_up(&server->request_q); + rdata->credits = le16_to_cpu(shdr->CreditCharge); flags |= CIFS_HAS_CREDITS; } @@ -3474,12 +3566,14 @@ smb2_async_writev(struct cifs_writedata *wdata, if (wdata->credits) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = shdr->CreditCharge; + shdr->CreditRequest = + cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1); spin_lock(&server->req_lock); server->credits += wdata->credits - le16_to_cpu(shdr->CreditCharge); spin_unlock(&server->req_lock); wake_up(&server->request_q); + wdata->credits = le16_to_cpu(shdr->CreditCharge); flags |= CIFS_HAS_CREDITS; } @@ -3994,7 +4088,6 @@ static int build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, int outbuf_len, u64 persistent_fid, u64 volatile_fid) { - struct TCP_Server_Info *server; int rc; struct smb2_query_info_req *req; unsigned int total_len; @@ -4004,8 +4097,6 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) return -EIO; - server = tcon->ses->server; - rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req, &total_len); if (rc) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 5671d5ee7f58..7a2d0a2255e6 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -898,7 +898,7 @@ struct validate_negotiate_info_req { __u8 Guid[SMB2_CLIENT_GUID_SIZE]; __le16 SecurityMode; __le16 DialectCount; - __le16 Dialects[3]; /* BB expand this if autonegotiate > 3 dialects */ + __le16 Dialects[4]; /* BB expand this if autonegotiate > 4 dialects */ } __packed; struct validate_negotiate_info_rsp { @@ -1398,7 +1398,6 @@ struct smb2_file_link_info { /* encoding of request for level 11 */ char FileName[0]; /* Name to be assigned to new link */ } __packed; /* level 11 Set */ -#define SMB2_MIN_EA_BUF 2048 #define SMB2_MAX_EA_BUF 65536 struct smb2_file_full_ea_info { /* encoding of response for level 15 */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 2fe78acd7d0c..87733b27a65f 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -116,9 +116,8 @@ extern void smb2_reconnect_server(struct work_struct *work); extern int smb3_crypto_aead_allocate(struct TCP_Server_Info *server); extern unsigned long smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst); -extern void smb2_set_next_command(struct TCP_Server_Info *server, - struct smb_rqst *rqst, - bool has_space_for_padding); +extern void smb2_set_next_command(struct cifs_tcon *tcon, + struct smb_rqst *rqst); extern void smb2_set_related(struct smb_rqst *rqst); /* @@ -154,10 +153,6 @@ extern int SMB2_close_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, extern void SMB2_close_free(struct smb_rqst *rqst); extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); -extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_file_id, u64 volatile_file_id, - int ea_buf_size, - struct smb2_file_full_ea_info *data); extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct smb2_file_all_info *data); @@ -241,4 +236,10 @@ extern void smb2_copy_fs_info_to_kstatfs( extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server); extern int smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec); +extern int smb2_query_info_compound(const unsigned int xid, + struct cifs_tcon *tcon, + __le16 *utf16_path, u32 desired_access, + u32 class, u32 type, u32 output_len, + struct kvec *rsp, int *buftype, + struct cifs_sb_info *cifs_sb); #endif /* _SMB2PROTO_H */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 83ff0c25710d..202e0e84efdd 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -126,9 +126,11 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) if ((slow_rsp_threshold != 0) && time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) && (midEntry->command != command)) { - /* smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command */ - if ((le16_to_cpu(midEntry->command) < NUMBER_OF_SMB2_COMMANDS) && - (le16_to_cpu(midEntry->command) >= 0)) + /* + * smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command + * NB: le16_to_cpu returns unsigned so can not be negative below + */ + if (le16_to_cpu(midEntry->command) < NUMBER_OF_SMB2_COMMANDS) cifs_stats_inc(&midEntry->server->smb2slowcmd[le16_to_cpu(midEntry->command)]); trace_smb3_slow_rsp(le16_to_cpu(midEntry->command), @@ -385,7 +387,7 @@ smbd_done: if (rc < 0 && rc != -EINTR) cifs_dbg(VFS, "Error %d sending data on socket to server\n", rc); - else + else if (rc > 0) rc = 0; return rc; @@ -781,8 +783,34 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst) } static void -cifs_noop_callback(struct mid_q_entry *mid) +cifs_compound_callback(struct mid_q_entry *mid) +{ + struct TCP_Server_Info *server = mid->server; + unsigned int optype = mid->optype; + unsigned int credits_received = 0; + + if (mid->mid_state == MID_RESPONSE_RECEIVED) { + if (mid->resp_buf) + credits_received = server->ops->get_credits(mid); + else + cifs_dbg(FYI, "Bad state for cancelled MID\n"); + } + + add_credits(server, credits_received, optype); +} + +static void +cifs_compound_last_callback(struct mid_q_entry *mid) +{ + cifs_compound_callback(mid); + cifs_wake_up_task(mid); +} + +static void +cifs_cancelled_callback(struct mid_q_entry *mid) { + cifs_compound_callback(mid); + DeleteMidQEntry(mid); } int @@ -793,7 +821,8 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, int i, j, rc = 0; int timeout, optype; struct mid_q_entry *midQ[MAX_COMPOUND]; - unsigned int credits = 0; + bool cancelled_mid[MAX_COMPOUND] = {false}; + unsigned int credits[MAX_COMPOUND] = {0}; char *buf; timeout = flags & CIFS_TIMEOUT_MASK; @@ -811,13 +840,31 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, return -ENOENT; /* - * Ensure that we do not send more than 50 overlapping requests - * to the same server. We may make this configurable later or - * use ses->maxReq. + * Ensure we obtain 1 credit per request in the compound chain. + * It can be optimized further by waiting for all the credits + * at once but this can wait long enough if we don't have enough + * credits due to some heavy operations in progress or the server + * not granting us much, so a fallback to the current approach is + * needed anyway. */ - rc = wait_for_free_request(ses->server, timeout, optype); - if (rc) - return rc; + for (i = 0; i < num_rqst; i++) { + rc = wait_for_free_request(ses->server, timeout, optype); + if (rc) { + /* + * We haven't sent an SMB packet to the server yet but + * we already obtained credits for i requests in the + * compound chain - need to return those credits back + * for future use. Note that we need to call add_credits + * multiple times to match the way we obtained credits + * in the first place and to account for in flight + * requests correctly. + */ + for (j = 0; j < i; j++) + add_credits(ses->server, 1, optype); + return rc; + } + credits[i] = 1; + } /* * Make sure that we sign in the same order that we send on this socket @@ -833,18 +880,24 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, for (j = 0; j < i; j++) cifs_delete_mid(midQ[j]); mutex_unlock(&ses->server->srv_mutex); + /* Update # of requests on wire to server */ - add_credits(ses->server, 1, optype); + for (j = 0; j < num_rqst; j++) + add_credits(ses->server, credits[j], optype); return PTR_ERR(midQ[i]); } midQ[i]->mid_state = MID_REQUEST_SUBMITTED; + midQ[i]->optype = optype; /* - * We don't invoke the callback compounds unless it is the last - * request. + * Invoke callback for every part of the compound chain + * to calculate credits properly. Wake up this thread only when + * the last element is received. */ if (i < num_rqst - 1) - midQ[i]->callback = cifs_noop_callback; + midQ[i]->callback = cifs_compound_callback; + else + midQ[i]->callback = cifs_compound_last_callback; } cifs_in_send_inc(ses->server); rc = smb_send_rqst(ses->server, num_rqst, rqst, flags); @@ -858,8 +911,20 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, mutex_unlock(&ses->server->srv_mutex); - if (rc < 0) + if (rc < 0) { + /* Sending failed for some reason - return credits back */ + for (i = 0; i < num_rqst; i++) + add_credits(ses->server, credits[i], optype); goto out; + } + + /* + * At this point the request is passed to the network stack - we assume + * that any credits taken from the server structure on the client have + * been spent and we can't return them back. Once we receive responses + * we will collect credits granted by the server in the mid callbacks + * and add those credits to the server structure. + */ /* * Compounding is never used during session establish. @@ -873,36 +938,34 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, for (i = 0; i < num_rqst; i++) { rc = wait_for_response(ses->server, midQ[i]); - if (rc != 0) { + if (rc != 0) + break; + } + if (rc != 0) { + for (; i < num_rqst; i++) { cifs_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n", midQ[i]->mid, le16_to_cpu(midQ[i]->command)); send_cancel(ses->server, &rqst[i], midQ[i]); spin_lock(&GlobalMid_Lock); if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) { midQ[i]->mid_flags |= MID_WAIT_CANCELLED; - midQ[i]->callback = DeleteMidQEntry; - spin_unlock(&GlobalMid_Lock); - add_credits(ses->server, 1, optype); - return rc; + midQ[i]->callback = cifs_cancelled_callback; + cancelled_mid[i] = true; + credits[i] = 0; } spin_unlock(&GlobalMid_Lock); } } - for (i = 0; i < num_rqst; i++) - if (midQ[i]->resp_buf) - credits += ses->server->ops->get_credits(midQ[i]); - if (!credits) - credits = 1; - for (i = 0; i < num_rqst; i++) { if (rc < 0) goto out; rc = cifs_sync_mid_result(midQ[i], ses->server); if (rc != 0) { - add_credits(ses->server, credits, optype); - return rc; + /* mark this mid as cancelled to not free it below */ + cancelled_mid[i] = true; + goto out; } if (!midQ[i]->resp_buf || @@ -949,9 +1012,10 @@ out: * This is prevented above by using a noop callback that will not * wake this thread except for the very last PDU. */ - for (i = 0; i < num_rqst; i++) - cifs_delete_mid(midQ[i]); - add_credits(ses->server, credits, optype); + for (i = 0; i < num_rqst; i++) { + if (!cancelled_mid[i]) + cifs_delete_mid(midQ[i]); + } return rc; } diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 0f46cf550907..4dc788e3bc96 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -133,15 +133,25 @@ struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) } EXPORT_SYMBOL(fscrypt_get_ctx); +void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, + const struct fscrypt_info *ci) +{ + memset(iv, 0, ci->ci_mode->ivsize); + iv->lblk_num = cpu_to_le64(lblk_num); + + if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY) + memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE); + + if (ci->ci_essiv_tfm != NULL) + crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw); +} + int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, u64 lblk_num, struct page *src_page, struct page *dest_page, unsigned int len, unsigned int offs, gfp_t gfp_flags) { - struct { - __le64 index; - u8 padding[FS_IV_SIZE - sizeof(__le64)]; - } iv; + union fscrypt_iv iv; struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist dst, src; @@ -151,15 +161,7 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, BUG_ON(len == 0); - BUILD_BUG_ON(sizeof(iv) != FS_IV_SIZE); - BUILD_BUG_ON(AES_BLOCK_SIZE != FS_IV_SIZE); - iv.index = cpu_to_le64(lblk_num); - memset(iv.padding, 0, sizeof(iv.padding)); - - if (ci->ci_essiv_tfm != NULL) { - crypto_cipher_encrypt_one(ci->ci_essiv_tfm, (u8 *)&iv, - (u8 *)&iv); - } + fscrypt_generate_iv(&iv, lblk_num, ci); req = skcipher_request_alloc(tfm, gfp_flags); if (!req) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index d7a0f682ca12..7ff40a73dbec 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -40,10 +40,11 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); - struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm; - int res = 0; - char iv[FS_CRYPTO_BLOCK_SIZE]; + struct fscrypt_info *ci = inode->i_crypt_info; + struct crypto_skcipher *tfm = ci->ci_ctfm; + union fscrypt_iv iv; struct scatterlist sg; + int res; /* * Copy the filename to the output buffer for encrypting in-place and @@ -55,7 +56,7 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, memset(out + iname->len, 0, olen - iname->len); /* Initialize the IV */ - memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); + fscrypt_generate_iv(&iv, 0, ci); /* Set up the encryption request */ req = skcipher_request_alloc(tfm, GFP_NOFS); @@ -65,7 +66,7 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); sg_init_one(&sg, out, olen); - skcipher_request_set_crypt(req, &sg, &sg, olen, iv); + skcipher_request_set_crypt(req, &sg, &sg, olen, &iv); /* Do the encryption */ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); @@ -94,9 +95,10 @@ static int fname_decrypt(struct inode *inode, struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; - struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm; - int res = 0; - char iv[FS_CRYPTO_BLOCK_SIZE]; + struct fscrypt_info *ci = inode->i_crypt_info; + struct crypto_skcipher *tfm = ci->ci_ctfm; + union fscrypt_iv iv; + int res; /* Allocate request */ req = skcipher_request_alloc(tfm, GFP_NOFS); @@ -107,12 +109,12 @@ static int fname_decrypt(struct inode *inode, crypto_req_done, &wait); /* Initialize IV */ - memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); + fscrypt_generate_iv(&iv, 0, ci); /* Create decryption request */ sg_init_one(&src_sg, iname->name, iname->len); sg_init_one(&dst_sg, oname->name, oname->len); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv); + skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, &iv); res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait); skcipher_request_free(req); if (res < 0) { diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 79debfc9cef9..7424f851eb5c 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -17,7 +17,6 @@ #include <crypto/hash.h> /* Encryption parameters */ -#define FS_IV_SIZE 16 #define FS_KEY_DERIVATION_NONCE_SIZE 16 /** @@ -52,16 +51,42 @@ struct fscrypt_symlink_data { } __packed; /* - * A pointer to this structure is stored in the file system's in-core - * representation of an inode. + * fscrypt_info - the "encryption key" for an inode + * + * When an encrypted file's key is made available, an instance of this struct is + * allocated and stored in ->i_crypt_info. Once created, it remains until the + * inode is evicted. */ struct fscrypt_info { + + /* The actual crypto transform used for encryption and decryption */ + struct crypto_skcipher *ci_ctfm; + + /* + * Cipher for ESSIV IV generation. Only set for CBC contents + * encryption, otherwise is NULL. + */ + struct crypto_cipher *ci_essiv_tfm; + + /* + * Encryption mode used for this inode. It corresponds to either + * ci_data_mode or ci_filename_mode, depending on the inode type. + */ + struct fscrypt_mode *ci_mode; + + /* + * If non-NULL, then this inode uses a master key directly rather than a + * derived key, and ci_ctfm will equal ci_master_key->mk_ctfm. + * Otherwise, this inode uses a derived key. + */ + struct fscrypt_master_key *ci_master_key; + + /* fields from the fscrypt_context */ u8 ci_data_mode; u8 ci_filename_mode; u8 ci_flags; - struct crypto_skcipher *ci_ctfm; - struct crypto_cipher *ci_essiv_tfm; - u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE]; + u8 ci_master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; + u8 ci_nonce[FS_KEY_DERIVATION_NONCE_SIZE]; }; typedef enum { @@ -83,6 +108,10 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode, filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) return true; + if (contents_mode == FS_ENCRYPTION_MODE_ADIANTUM && + filenames_mode == FS_ENCRYPTION_MODE_ADIANTUM) + return true; + return false; } @@ -107,6 +136,22 @@ fscrypt_msg(struct super_block *sb, const char *level, const char *fmt, ...); #define fscrypt_err(sb, fmt, ...) \ fscrypt_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__) +#define FSCRYPT_MAX_IV_SIZE 32 + +union fscrypt_iv { + struct { + /* logical block number within the file */ + __le64 lblk_num; + + /* per-file nonce; only set in DIRECT_KEY mode */ + u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE]; + }; + u8 raw[FSCRYPT_MAX_IV_SIZE]; +}; + +void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, + const struct fscrypt_info *ci); + /* fname.c */ extern int fname_encrypt(struct inode *inode, const struct qstr *iname, u8 *out, unsigned int olen); @@ -115,6 +160,16 @@ extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 *encrypted_len_ret); /* keyinfo.c */ + +struct fscrypt_mode { + const char *friendly_name; + const char *cipher_str; + int keysize; + int ivsize; + bool logged_impl_name; + bool needs_essiv; +}; + extern void __exit fscrypt_essiv_cleanup(void); #endif /* _FSCRYPT_PRIVATE_H */ diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 7874c9bb2fc5..1e11a683f63d 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -10,15 +10,21 @@ */ #include <keys/user-type.h> +#include <linux/hashtable.h> #include <linux/scatterlist.h> #include <linux/ratelimit.h> #include <crypto/aes.h> +#include <crypto/algapi.h> #include <crypto/sha.h> #include <crypto/skcipher.h> #include "fscrypt_private.h" static struct crypto_shash *essiv_hash_tfm; +/* Table of keys referenced by FS_POLICY_FLAG_DIRECT_KEY policies */ +static DEFINE_HASHTABLE(fscrypt_master_keys, 6); /* 6 bits = 64 buckets */ +static DEFINE_SPINLOCK(fscrypt_master_keys_lock); + /* * Key derivation function. This generates the derived key by encrypting the * master key with AES-128-ECB using the inode's nonce as the AES key. @@ -123,56 +129,37 @@ invalid: return ERR_PTR(-ENOKEY); } -/* Find the master key, then derive the inode's actual encryption key */ -static int find_and_derive_key(const struct inode *inode, - const struct fscrypt_context *ctx, - u8 *derived_key, unsigned int derived_keysize) -{ - struct key *key; - const struct fscrypt_key *payload; - int err; - - key = find_and_lock_process_key(FS_KEY_DESC_PREFIX, - ctx->master_key_descriptor, - derived_keysize, &payload); - if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) { - key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix, - ctx->master_key_descriptor, - derived_keysize, &payload); - } - if (IS_ERR(key)) - return PTR_ERR(key); - err = derive_key_aes(payload->raw, ctx, derived_key, derived_keysize); - up_read(&key->sem); - key_put(key); - return err; -} - -static struct fscrypt_mode { - const char *friendly_name; - const char *cipher_str; - int keysize; - bool logged_impl_name; -} available_modes[] = { +static struct fscrypt_mode available_modes[] = { [FS_ENCRYPTION_MODE_AES_256_XTS] = { .friendly_name = "AES-256-XTS", .cipher_str = "xts(aes)", .keysize = 64, + .ivsize = 16, }, [FS_ENCRYPTION_MODE_AES_256_CTS] = { .friendly_name = "AES-256-CTS-CBC", .cipher_str = "cts(cbc(aes))", .keysize = 32, + .ivsize = 16, }, [FS_ENCRYPTION_MODE_AES_128_CBC] = { .friendly_name = "AES-128-CBC", .cipher_str = "cbc(aes)", .keysize = 16, + .ivsize = 16, + .needs_essiv = true, }, [FS_ENCRYPTION_MODE_AES_128_CTS] = { .friendly_name = "AES-128-CTS-CBC", .cipher_str = "cts(cbc(aes))", .keysize = 16, + .ivsize = 16, + }, + [FS_ENCRYPTION_MODE_ADIANTUM] = { + .friendly_name = "Adiantum", + .cipher_str = "adiantum(xchacha12,aes)", + .keysize = 32, + .ivsize = 32, }, }; @@ -198,14 +185,196 @@ select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode) return ERR_PTR(-EINVAL); } -static void put_crypt_info(struct fscrypt_info *ci) +/* Find the master key, then derive the inode's actual encryption key */ +static int find_and_derive_key(const struct inode *inode, + const struct fscrypt_context *ctx, + u8 *derived_key, const struct fscrypt_mode *mode) { - if (!ci) + struct key *key; + const struct fscrypt_key *payload; + int err; + + key = find_and_lock_process_key(FS_KEY_DESC_PREFIX, + ctx->master_key_descriptor, + mode->keysize, &payload); + if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) { + key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix, + ctx->master_key_descriptor, + mode->keysize, &payload); + } + if (IS_ERR(key)) + return PTR_ERR(key); + + if (ctx->flags & FS_POLICY_FLAG_DIRECT_KEY) { + if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) { + fscrypt_warn(inode->i_sb, + "direct key mode not allowed with %s", + mode->friendly_name); + err = -EINVAL; + } else if (ctx->contents_encryption_mode != + ctx->filenames_encryption_mode) { + fscrypt_warn(inode->i_sb, + "direct key mode not allowed with different contents and filenames modes"); + err = -EINVAL; + } else { + memcpy(derived_key, payload->raw, mode->keysize); + err = 0; + } + } else { + err = derive_key_aes(payload->raw, ctx, derived_key, + mode->keysize); + } + up_read(&key->sem); + key_put(key); + return err; +} + +/* Allocate and key a symmetric cipher object for the given encryption mode */ +static struct crypto_skcipher * +allocate_skcipher_for_mode(struct fscrypt_mode *mode, const u8 *raw_key, + const struct inode *inode) +{ + struct crypto_skcipher *tfm; + int err; + + tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0); + if (IS_ERR(tfm)) { + fscrypt_warn(inode->i_sb, + "error allocating '%s' transform for inode %lu: %ld", + mode->cipher_str, inode->i_ino, PTR_ERR(tfm)); + return tfm; + } + if (unlikely(!mode->logged_impl_name)) { + /* + * fscrypt performance can vary greatly depending on which + * crypto algorithm implementation is used. Help people debug + * performance problems by logging the ->cra_driver_name the + * first time a mode is used. Note that multiple threads can + * race here, but it doesn't really matter. + */ + mode->logged_impl_name = true; + pr_info("fscrypt: %s using implementation \"%s\"\n", + mode->friendly_name, + crypto_skcipher_alg(tfm)->base.cra_driver_name); + } + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); + err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize); + if (err) + goto err_free_tfm; + + return tfm; + +err_free_tfm: + crypto_free_skcipher(tfm); + return ERR_PTR(err); +} + +/* Master key referenced by FS_POLICY_FLAG_DIRECT_KEY policy */ +struct fscrypt_master_key { + struct hlist_node mk_node; + refcount_t mk_refcount; + const struct fscrypt_mode *mk_mode; + struct crypto_skcipher *mk_ctfm; + u8 mk_descriptor[FS_KEY_DESCRIPTOR_SIZE]; + u8 mk_raw[FS_MAX_KEY_SIZE]; +}; + +static void free_master_key(struct fscrypt_master_key *mk) +{ + if (mk) { + crypto_free_skcipher(mk->mk_ctfm); + kzfree(mk); + } +} + +static void put_master_key(struct fscrypt_master_key *mk) +{ + if (!refcount_dec_and_lock(&mk->mk_refcount, &fscrypt_master_keys_lock)) return; + hash_del(&mk->mk_node); + spin_unlock(&fscrypt_master_keys_lock); - crypto_free_skcipher(ci->ci_ctfm); - crypto_free_cipher(ci->ci_essiv_tfm); - kmem_cache_free(fscrypt_info_cachep, ci); + free_master_key(mk); +} + +/* + * Find/insert the given master key into the fscrypt_master_keys table. If + * found, it is returned with elevated refcount, and 'to_insert' is freed if + * non-NULL. If not found, 'to_insert' is inserted and returned if it's + * non-NULL; otherwise NULL is returned. + */ +static struct fscrypt_master_key * +find_or_insert_master_key(struct fscrypt_master_key *to_insert, + const u8 *raw_key, const struct fscrypt_mode *mode, + const struct fscrypt_info *ci) +{ + unsigned long hash_key; + struct fscrypt_master_key *mk; + + /* + * Careful: to avoid potentially leaking secret key bytes via timing + * information, we must key the hash table by descriptor rather than by + * raw key, and use crypto_memneq() when comparing raw keys. + */ + + BUILD_BUG_ON(sizeof(hash_key) > FS_KEY_DESCRIPTOR_SIZE); + memcpy(&hash_key, ci->ci_master_key_descriptor, sizeof(hash_key)); + + spin_lock(&fscrypt_master_keys_lock); + hash_for_each_possible(fscrypt_master_keys, mk, mk_node, hash_key) { + if (memcmp(ci->ci_master_key_descriptor, mk->mk_descriptor, + FS_KEY_DESCRIPTOR_SIZE) != 0) + continue; + if (mode != mk->mk_mode) + continue; + if (crypto_memneq(raw_key, mk->mk_raw, mode->keysize)) + continue; + /* using existing tfm with same (descriptor, mode, raw_key) */ + refcount_inc(&mk->mk_refcount); + spin_unlock(&fscrypt_master_keys_lock); + free_master_key(to_insert); + return mk; + } + if (to_insert) + hash_add(fscrypt_master_keys, &to_insert->mk_node, hash_key); + spin_unlock(&fscrypt_master_keys_lock); + return to_insert; +} + +/* Prepare to encrypt directly using the master key in the given mode */ +static struct fscrypt_master_key * +fscrypt_get_master_key(const struct fscrypt_info *ci, struct fscrypt_mode *mode, + const u8 *raw_key, const struct inode *inode) +{ + struct fscrypt_master_key *mk; + int err; + + /* Is there already a tfm for this key? */ + mk = find_or_insert_master_key(NULL, raw_key, mode, ci); + if (mk) + return mk; + + /* Nope, allocate one. */ + mk = kzalloc(sizeof(*mk), GFP_NOFS); + if (!mk) + return ERR_PTR(-ENOMEM); + refcount_set(&mk->mk_refcount, 1); + mk->mk_mode = mode; + mk->mk_ctfm = allocate_skcipher_for_mode(mode, raw_key, inode); + if (IS_ERR(mk->mk_ctfm)) { + err = PTR_ERR(mk->mk_ctfm); + mk->mk_ctfm = NULL; + goto err_free_mk; + } + memcpy(mk->mk_descriptor, ci->ci_master_key_descriptor, + FS_KEY_DESCRIPTOR_SIZE); + memcpy(mk->mk_raw, raw_key, mode->keysize); + + return find_or_insert_master_key(mk, raw_key, mode, ci); + +err_free_mk: + free_master_key(mk); + return ERR_PTR(err); } static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt) @@ -275,11 +444,67 @@ void __exit fscrypt_essiv_cleanup(void) crypto_free_shash(essiv_hash_tfm); } +/* + * Given the encryption mode and key (normally the derived key, but for + * FS_POLICY_FLAG_DIRECT_KEY mode it's the master key), set up the inode's + * symmetric cipher transform object(s). + */ +static int setup_crypto_transform(struct fscrypt_info *ci, + struct fscrypt_mode *mode, + const u8 *raw_key, const struct inode *inode) +{ + struct fscrypt_master_key *mk; + struct crypto_skcipher *ctfm; + int err; + + if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY) { + mk = fscrypt_get_master_key(ci, mode, raw_key, inode); + if (IS_ERR(mk)) + return PTR_ERR(mk); + ctfm = mk->mk_ctfm; + } else { + mk = NULL; + ctfm = allocate_skcipher_for_mode(mode, raw_key, inode); + if (IS_ERR(ctfm)) + return PTR_ERR(ctfm); + } + ci->ci_master_key = mk; + ci->ci_ctfm = ctfm; + + if (mode->needs_essiv) { + /* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */ + WARN_ON(mode->ivsize != AES_BLOCK_SIZE); + WARN_ON(ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY); + + err = init_essiv_generator(ci, raw_key, mode->keysize); + if (err) { + fscrypt_warn(inode->i_sb, + "error initializing ESSIV generator for inode %lu: %d", + inode->i_ino, err); + return err; + } + } + return 0; +} + +static void put_crypt_info(struct fscrypt_info *ci) +{ + if (!ci) + return; + + if (ci->ci_master_key) { + put_master_key(ci->ci_master_key); + } else { + crypto_free_skcipher(ci->ci_ctfm); + crypto_free_cipher(ci->ci_essiv_tfm); + } + kmem_cache_free(fscrypt_info_cachep, ci); +} + int fscrypt_get_encryption_info(struct inode *inode) { struct fscrypt_info *crypt_info; struct fscrypt_context ctx; - struct crypto_skcipher *ctfm; struct fscrypt_mode *mode; u8 *raw_key = NULL; int res; @@ -312,74 +537,42 @@ int fscrypt_get_encryption_info(struct inode *inode) if (ctx.flags & ~FS_POLICY_FLAGS_VALID) return -EINVAL; - crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS); + crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS); if (!crypt_info) return -ENOMEM; crypt_info->ci_flags = ctx.flags; crypt_info->ci_data_mode = ctx.contents_encryption_mode; crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; - crypt_info->ci_ctfm = NULL; - crypt_info->ci_essiv_tfm = NULL; - memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, - sizeof(crypt_info->ci_master_key)); + memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor, + FS_KEY_DESCRIPTOR_SIZE); + memcpy(crypt_info->ci_nonce, ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE); mode = select_encryption_mode(crypt_info, inode); if (IS_ERR(mode)) { res = PTR_ERR(mode); goto out; } + WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE); + crypt_info->ci_mode = mode; /* - * This cannot be a stack buffer because it is passed to the scatterlist - * crypto API as part of key derivation. + * This cannot be a stack buffer because it may be passed to the + * scatterlist crypto API as part of key derivation. */ res = -ENOMEM; raw_key = kmalloc(mode->keysize, GFP_NOFS); if (!raw_key) goto out; - res = find_and_derive_key(inode, &ctx, raw_key, mode->keysize); + res = find_and_derive_key(inode, &ctx, raw_key, mode); if (res) goto out; - ctfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0); - if (IS_ERR(ctfm)) { - res = PTR_ERR(ctfm); - fscrypt_warn(inode->i_sb, - "error allocating '%s' transform for inode %lu: %d", - mode->cipher_str, inode->i_ino, res); - goto out; - } - if (unlikely(!mode->logged_impl_name)) { - /* - * fscrypt performance can vary greatly depending on which - * crypto algorithm implementation is used. Help people debug - * performance problems by logging the ->cra_driver_name the - * first time a mode is used. Note that multiple threads can - * race here, but it doesn't really matter. - */ - mode->logged_impl_name = true; - pr_info("fscrypt: %s using implementation \"%s\"\n", - mode->friendly_name, - crypto_skcipher_alg(ctfm)->base.cra_driver_name); - } - crypt_info->ci_ctfm = ctfm; - crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY); - res = crypto_skcipher_setkey(ctfm, raw_key, mode->keysize); + res = setup_crypto_transform(crypt_info, mode, raw_key, inode); if (res) goto out; - if (S_ISREG(inode->i_mode) && - crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) { - res = init_essiv_generator(crypt_info, raw_key, mode->keysize); - if (res) { - fscrypt_warn(inode->i_sb, - "error initializing ESSIV generator for inode %lu: %d", - inode->i_ino, res); - goto out; - } - } if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL) crypt_info = NULL; out: diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index c6d431a5cce9..f490de921ce8 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -199,7 +199,8 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) child_ci = child->i_crypt_info; if (parent_ci && child_ci) { - return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key, + return memcmp(parent_ci->ci_master_key_descriptor, + child_ci->ci_master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE) == 0 && (parent_ci->ci_data_mode == child_ci->ci_data_mode) && (parent_ci->ci_filename_mode == @@ -254,7 +255,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child, ctx.contents_encryption_mode = ci->ci_data_mode; ctx.filenames_encryption_mode = ci->ci_filename_mode; ctx.flags = ci->ci_flags; - memcpy(ctx.master_key_descriptor, ci->ci_master_key, + memcpy(ctx.master_key_descriptor, ci->ci_master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE); get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE); BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8a5a1010886b..a5d219d920e7 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -381,7 +381,8 @@ static void ep_nested_calls_init(struct nested_calls *ncalls) */ static inline int ep_events_available(struct eventpoll *ep) { - return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR; + return !list_empty_careful(&ep->rdllist) || + READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR; } #ifdef CONFIG_NET_RX_BUSY_POLL @@ -471,7 +472,6 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) * no re-entered. * * @ncalls: Pointer to the nested_calls structure to be used for this call. - * @max_nests: Maximum number of allowed nesting calls. * @nproc: Nested call core function pointer. * @priv: Opaque data to be passed to the @nproc callback. * @cookie: Cookie to be used to identify this nested call. @@ -480,7 +480,7 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) * Returns: Returns the code returned by the @nproc callback, or -1 if * the maximum recursion limit has been exceeded. */ -static int ep_call_nested(struct nested_calls *ncalls, int max_nests, +static int ep_call_nested(struct nested_calls *ncalls, int (*nproc)(void *, void *, int), void *priv, void *cookie, void *ctx) { @@ -499,7 +499,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests, */ list_for_each_entry(tncur, lsthead, llink) { if (tncur->ctx == ctx && - (tncur->cookie == cookie || ++call_nests > max_nests)) { + (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) { /* * Ops ... loop detected or maximum nest level reached. * We abort this wake by breaking the cycle itself. @@ -573,7 +573,7 @@ static void ep_poll_safewake(wait_queue_head_t *wq) { int this_cpu = get_cpu(); - ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, + ep_call_nested(&poll_safewake_ncalls, ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); put_cpu(); @@ -699,7 +699,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, */ spin_lock_irq(&ep->wq.lock); list_splice_init(&ep->rdllist, &txlist); - ep->ovflist = NULL; + WRITE_ONCE(ep->ovflist, NULL); spin_unlock_irq(&ep->wq.lock); /* @@ -713,7 +713,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, * other events might have been queued by the poll callback. * We re-insert them inside the main ready-list here. */ - for (nepi = ep->ovflist; (epi = nepi) != NULL; + for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL; nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { /* * We need to check if the item is already in the list. @@ -731,7 +731,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, * releasing the lock, events will be queued in the normal way inside * ep->rdllist. */ - ep->ovflist = EP_UNACTIVE_PTR; + WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR); /* * Quickly re-inject items left on "txlist". @@ -1154,10 +1154,10 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * semantics). All the events that happen during that period of time are * chained in ep->ovflist and requeued later on. */ - if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) { + if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { if (epi->next == EP_UNACTIVE_PTR) { - epi->next = ep->ovflist; - ep->ovflist = epi; + epi->next = READ_ONCE(ep->ovflist); + WRITE_ONCE(ep->ovflist, epi); if (epi->ws) { /* * Activate ep->ws since epi->ws may get @@ -1333,7 +1333,6 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) } } else { error = ep_call_nested(&poll_loop_ncalls, - EP_MAX_NESTS, reverse_path_check_proc, child_file, child_file, current); @@ -1367,7 +1366,7 @@ static int reverse_path_check(void) /* let's call this for all tfiles */ list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { path_count_init(); - error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + error = ep_call_nested(&poll_loop_ncalls, reverse_path_check_proc, current_file, current_file, current); if (error) @@ -1626,21 +1625,24 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head { struct ep_send_events_data *esed = priv; __poll_t revents; - struct epitem *epi; - struct epoll_event __user *uevent; + struct epitem *epi, *tmp; + struct epoll_event __user *uevent = esed->events; struct wakeup_source *ws; poll_table pt; init_poll_funcptr(&pt, NULL); + esed->res = 0; /* * We can loop without lock because we are passed a task private list. * Items cannot vanish during the loop because ep_scan_ready_list() is * holding "mtx" during this call. */ - for (esed->res = 0, uevent = esed->events; - !list_empty(head) && esed->res < esed->maxevents;) { - epi = list_first_entry(head, struct epitem, rdllink); + lockdep_assert_held(&ep->mtx); + + list_for_each_entry_safe(epi, tmp, head, rdllink) { + if (esed->res >= esed->maxevents) + break; /* * Activate ep->ws before deactivating epi->ws to prevent @@ -1660,42 +1662,42 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head list_del_init(&epi->rdllink); - revents = ep_item_poll(epi, &pt, 1); - /* * If the event mask intersect the caller-requested one, * deliver the event to userspace. Again, ep_scan_ready_list() - * is holding "mtx", so no operations coming from userspace + * is holding ep->mtx, so no operations coming from userspace * can change the item. */ - if (revents) { - if (__put_user(revents, &uevent->events) || - __put_user(epi->event.data, &uevent->data)) { - list_add(&epi->rdllink, head); - ep_pm_stay_awake(epi); - if (!esed->res) - esed->res = -EFAULT; - return 0; - } - esed->res++; - uevent++; - if (epi->event.events & EPOLLONESHOT) - epi->event.events &= EP_PRIVATE_BITS; - else if (!(epi->event.events & EPOLLET)) { - /* - * If this file has been added with Level - * Trigger mode, we need to insert back inside - * the ready list, so that the next call to - * epoll_wait() will check again the events - * availability. At this point, no one can insert - * into ep->rdllist besides us. The epoll_ctl() - * callers are locked out by - * ep_scan_ready_list() holding "mtx" and the - * poll callback will queue them in ep->ovflist. - */ - list_add_tail(&epi->rdllink, &ep->rdllist); - ep_pm_stay_awake(epi); - } + revents = ep_item_poll(epi, &pt, 1); + if (!revents) + continue; + + if (__put_user(revents, &uevent->events) || + __put_user(epi->event.data, &uevent->data)) { + list_add(&epi->rdllink, head); + ep_pm_stay_awake(epi); + if (!esed->res) + esed->res = -EFAULT; + return 0; + } + esed->res++; + uevent++; + if (epi->event.events & EPOLLONESHOT) + epi->event.events &= EP_PRIVATE_BITS; + else if (!(epi->event.events & EPOLLET)) { + /* + * If this file has been added with Level + * Trigger mode, we need to insert back inside + * the ready list, so that the next call to + * epoll_wait() will check again the events + * availability. At this point, no one can insert + * into ep->rdllist besides us. The epoll_ctl() + * callers are locked out by + * ep_scan_ready_list() holding "mtx" and the + * poll callback will queue them in ep->ovflist. + */ + list_add_tail(&epi->rdllink, &ep->rdllist); + ep_pm_stay_awake(epi); } } @@ -1747,6 +1749,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, { int res = 0, eavail, timed_out = 0; u64 slack = 0; + bool waiter = false; wait_queue_entry_t wait; ktime_t expires, *to = NULL; @@ -1761,11 +1764,18 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } else if (timeout == 0) { /* * Avoid the unnecessary trip to the wait queue loop, if the - * caller specified a non blocking operation. + * caller specified a non blocking operation. We still need + * lock because we could race and not see an epi being added + * to the ready list while in irq callback. Thus incorrectly + * returning 0 back to userspace. */ timed_out = 1; + spin_lock_irq(&ep->wq.lock); - goto check_events; + eavail = ep_events_available(ep); + spin_unlock_irq(&ep->wq.lock); + + goto send_events; } fetch_events: @@ -1773,64 +1783,66 @@ fetch_events: if (!ep_events_available(ep)) ep_busy_loop(ep, timed_out); - spin_lock_irq(&ep->wq.lock); + eavail = ep_events_available(ep); + if (eavail) + goto send_events; - if (!ep_events_available(ep)) { - /* - * Busy poll timed out. Drop NAPI ID for now, we can add - * it back in when we have moved a socket with a valid NAPI - * ID onto the ready list. - */ - ep_reset_busy_poll_napi_id(ep); + /* + * Busy poll timed out. Drop NAPI ID for now, we can add + * it back in when we have moved a socket with a valid NAPI + * ID onto the ready list. + */ + ep_reset_busy_poll_napi_id(ep); - /* - * We don't have any available event to return to the caller. - * We need to sleep here, and we will be wake up by - * ep_poll_callback() when events will become available. - */ + /* + * We don't have any available event to return to the caller. We need + * to sleep here, and we will be woken by ep_poll_callback() when events + * become available. + */ + if (!waiter) { + waiter = true; init_waitqueue_entry(&wait, current); - __add_wait_queue_exclusive(&ep->wq, &wait); - for (;;) { - /* - * We don't want to sleep if the ep_poll_callback() sends us - * a wakeup in between. That's why we set the task state - * to TASK_INTERRUPTIBLE before doing the checks. - */ - set_current_state(TASK_INTERRUPTIBLE); - /* - * Always short-circuit for fatal signals to allow - * threads to make a timely exit without the chance of - * finding more events available and fetching - * repeatedly. - */ - if (fatal_signal_pending(current)) { - res = -EINTR; - break; - } - if (ep_events_available(ep) || timed_out) - break; - if (signal_pending(current)) { - res = -EINTR; - break; - } + spin_lock_irq(&ep->wq.lock); + __add_wait_queue_exclusive(&ep->wq, &wait); + spin_unlock_irq(&ep->wq.lock); + } - spin_unlock_irq(&ep->wq.lock); - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) - timed_out = 1; + for (;;) { + /* + * We don't want to sleep if the ep_poll_callback() sends us + * a wakeup in between. That's why we set the task state + * to TASK_INTERRUPTIBLE before doing the checks. + */ + set_current_state(TASK_INTERRUPTIBLE); + /* + * Always short-circuit for fatal signals to allow + * threads to make a timely exit without the chance of + * finding more events available and fetching + * repeatedly. + */ + if (fatal_signal_pending(current)) { + res = -EINTR; + break; + } - spin_lock_irq(&ep->wq.lock); + eavail = ep_events_available(ep); + if (eavail) + break; + if (signal_pending(current)) { + res = -EINTR; + break; } - __remove_wait_queue(&ep->wq, &wait); - __set_current_state(TASK_RUNNING); + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) { + timed_out = 1; + break; + } } -check_events: - /* Is it worth to try to dig for events ? */ - eavail = ep_events_available(ep); - spin_unlock_irq(&ep->wq.lock); + __set_current_state(TASK_RUNNING); +send_events: /* * Try to transfer events to user space. In case we get 0 events and * there's still timeout left over, we go trying again in search of @@ -1840,6 +1852,12 @@ check_events: !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto fetch_events; + if (waiter) { + spin_lock_irq(&ep->wq.lock); + __remove_wait_queue(&ep->wq, &wait); + spin_unlock_irq(&ep->wq.lock); + } + return res; } @@ -1876,7 +1894,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit->visited) continue; - error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + error = ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, epi->ffd.file, ep_tovisit, current); if (error != 0) @@ -1916,7 +1934,7 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file) int ret; struct eventpoll *ep_cur, *ep_next; - ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + ret = ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, file, ep, current); /* clear visited list */ list_for_each_entry_safe(ep_cur, ep_next, &visited_list, @@ -2172,7 +2190,7 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events, return -EINVAL; /* Verify that the area passed by the user is writeable */ - if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) + if (!access_ok(events, maxevents * sizeof(struct epoll_event))) return -EFAULT; /* Get the "struct file *" for the eventpoll file */ diff --git a/fs/exec.c b/fs/exec.c index fc281b738a98..fb72d36f7823 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -218,55 +218,10 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, if (ret <= 0) return NULL; - if (write) { - unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; - unsigned long ptr_size, limit; - - /* - * Since the stack will hold pointers to the strings, we - * must account for them as well. - * - * The size calculation is the entire vma while each arg page is - * built, so each time we get here it's calculating how far it - * is currently (rather than each call being just the newly - * added size from the arg page). As a result, we need to - * always add the entire size of the pointers, so that on the - * last call to get_arg_page() we'll actually have the entire - * correct size. - */ - ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); - if (ptr_size > ULONG_MAX - size) - goto fail; - size += ptr_size; - - acct_arg_size(bprm, size / PAGE_SIZE); - - /* - * We've historically supported up to 32 pages (ARG_MAX) - * of argument strings even with small stacks - */ - if (size <= ARG_MAX) - return page; - - /* - * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM - * (whichever is smaller) for the argv+env strings. - * This ensures that: - * - the remaining binfmt code will not run out of stack space, - * - the program will have a reasonable amount of stack left - * to work from. - */ - limit = _STK_LIM / 4 * 3; - limit = min(limit, bprm->rlim_stack.rlim_cur / 4); - if (size > limit) - goto fail; - } + if (write) + acct_arg_size(bprm, vma_pages(bprm->vma)); return page; - -fail: - put_page(page); - return NULL; } static void put_arg_page(struct page *page) @@ -492,6 +447,50 @@ static int count(struct user_arg_ptr argv, int max) return i; } +static int prepare_arg_pages(struct linux_binprm *bprm, + struct user_arg_ptr argv, struct user_arg_ptr envp) +{ + unsigned long limit, ptr_size; + + bprm->argc = count(argv, MAX_ARG_STRINGS); + if (bprm->argc < 0) + return bprm->argc; + + bprm->envc = count(envp, MAX_ARG_STRINGS); + if (bprm->envc < 0) + return bprm->envc; + + /* + * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM + * (whichever is smaller) for the argv+env strings. + * This ensures that: + * - the remaining binfmt code will not run out of stack space, + * - the program will have a reasonable amount of stack left + * to work from. + */ + limit = _STK_LIM / 4 * 3; + limit = min(limit, bprm->rlim_stack.rlim_cur / 4); + /* + * We've historically supported up to 32 pages (ARG_MAX) + * of argument strings even with small stacks + */ + limit = max_t(unsigned long, limit, ARG_MAX); + /* + * We must account for the size of all the argv and envp pointers to + * the argv and envp strings, since they will also take up space in + * the stack. They aren't stored until much later when we can't + * signal to the parent that the child has run out of stack space. + * Instead, calculate it here so it's possible to fail gracefully. + */ + ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); + if (limit <= ptr_size) + return -E2BIG; + limit -= ptr_size; + + bprm->argmin = bprm->p - limit; + return 0; +} + /* * 'copy_strings()' copies argument/environment strings from the old * processes's memory to the new process's stack. The call to get_user_pages() @@ -527,6 +526,10 @@ static int copy_strings(int argc, struct user_arg_ptr argv, pos = bprm->p; str += len; bprm->p -= len; +#ifdef CONFIG_MMU + if (bprm->p < bprm->argmin) + goto out; +#endif while (len > 0) { int offset, bytes_to_copy; @@ -1084,7 +1087,7 @@ static int de_thread(struct task_struct *tsk) __set_current_state(TASK_KILLABLE); spin_unlock_irq(lock); schedule(); - if (unlikely(__fatal_signal_pending(tsk))) + if (__fatal_signal_pending(tsk)) goto killed; spin_lock_irq(lock); } @@ -1112,7 +1115,7 @@ static int de_thread(struct task_struct *tsk) write_unlock_irq(&tasklist_lock); cgroup_threadgroup_change_end(tsk); schedule(); - if (unlikely(__fatal_signal_pending(tsk))) + if (__fatal_signal_pending(tsk)) goto killed; } @@ -1399,7 +1402,7 @@ EXPORT_SYMBOL(finalize_exec); * Or, if exec fails before, free_bprm() should release ->cred and * and unlock. */ -int prepare_bprm_creds(struct linux_binprm *bprm) +static int prepare_bprm_creds(struct linux_binprm *bprm) { if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex)) return -ERESTARTNOINTR; @@ -1789,12 +1792,8 @@ static int __do_execve_file(int fd, struct filename *filename, if (retval) goto out_unmark; - bprm->argc = count(argv, MAX_ARG_STRINGS); - if ((retval = bprm->argc) < 0) - goto out; - - bprm->envc = count(envp, MAX_ARG_STRINGS); - if ((retval = bprm->envc) < 0) + retval = prepare_arg_pages(bprm, argv, envp); + if (retval < 0) goto out; retval = prepare_binprm(bprm); diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 906839a4da8f..fc80c7233fa5 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -705,21 +705,18 @@ out: /* * Read the superblock from the OSD and fill in the fields */ -static int exofs_fill_super(struct super_block *sb, void *data, int silent) +static int exofs_fill_super(struct super_block *sb, + struct exofs_mountopt *opts, + struct exofs_sb_info *sbi, + int silent) { struct inode *root; - struct exofs_mountopt *opts = data; - struct exofs_sb_info *sbi; /*extended info */ struct osd_dev *od; /* Master device */ struct exofs_fscb fscb; /*on-disk superblock info */ struct ore_comp comp; unsigned table_count; int ret; - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - /* use mount options to fill superblock */ if (opts->is_osdname) { struct osd_dev_info odi = {.systemid_len = 0}; @@ -863,7 +860,9 @@ static struct dentry *exofs_mount(struct file_system_type *type, int flags, const char *dev_name, void *data) { + struct super_block *s; struct exofs_mountopt opts; + struct exofs_sb_info *sbi; int ret; ret = parse_options(data, &opts); @@ -872,9 +871,31 @@ static struct dentry *exofs_mount(struct file_system_type *type, return ERR_PTR(ret); } + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) { + kfree(opts.dev_name); + return ERR_PTR(-ENOMEM); + } + + s = sget(type, NULL, set_anon_super, flags, NULL); + + if (IS_ERR(s)) { + kfree(opts.dev_name); + kfree(sbi); + return ERR_CAST(s); + } + if (!opts.dev_name) opts.dev_name = dev_name; - return mount_nodev(type, flags, &opts, exofs_fill_super); + + + ret = exofs_fill_super(s, &opts, sbi, flags & SB_SILENT ? 1 : 0); + if (ret) { + deactivate_locked_super(s); + return ERR_PTR(ret); + } + s->s_flags |= SB_ACTIVE; + return dget(s->s_root); } /* diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 26a7fe5c4fd3..712f00995390 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -116,8 +116,16 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } + ret = file_write_and_wait_range(file, start, end); + if (ret) + return ret; + if (!journal) { - ret = __generic_file_fsync(file, start, end, datasync); + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL + }; + + ret = ext4_write_inode(inode, &wbc); if (!ret) ret = ext4_sync_parent(inode); if (test_opt(inode->i_sb, BARRIER)) @@ -125,9 +133,6 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } - ret = file_write_and_wait_range(file, start, end); - if (ret) - return ret; /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. @@ -159,6 +164,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = err; } out: + err = file_check_and_advance_wb_err(file); + if (ret == 0) + ret = err; trace_ext4_sync_file_exit(inode, ret); return ret; } diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 27373d88b5f0..56f6e1782d5f 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1890,12 +1890,12 @@ int ext4_inline_data_fiemap(struct inode *inode, physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; physical += offsetof(struct ext4_inode, i_block); - if (physical) - error = fiemap_fill_next_extent(fieinfo, start, physical, - inline_len, flags); brelse(iloc.bh); out: up_read(&EXT4_I(inode)->xattr_sem); + if (physical) + error = fiemap_fill_next_extent(fieinfo, start, physical, + inline_len, flags); return (error < 0 ? error : 0); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9affabd07682..34d7e0703cc6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2778,7 +2778,8 @@ static int ext4_writepages(struct address_space *mapping, * We may need to convert up to one extent per block in * the page and we may dirty the inode. */ - rsv_blocks = 1 + (PAGE_SIZE >> inode->i_blkbits); + rsv_blocks = 1 + ext4_chunk_trans_blocks(inode, + PAGE_SIZE >> inode->i_blkbits); } /* @@ -4833,7 +4834,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, gid_t i_gid; projid_t i_projid; - if (((flags & EXT4_IGET_NORMAL) && + if ((!(flags & EXT4_IGET_SPECIAL) && (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) || (ino < EXT4_ROOT_INO) || (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) { diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index f461d75ac049..6aa282ee455a 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -128,7 +128,7 @@ int ext4_mpage_readpages(struct address_space *mapping, prefetchw(&page->flags); if (pages) { - page = list_entry(pages->prev, struct page, lru); + page = lru_to_page(pages); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, readahead_gfp_mask(mapping))) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d6c142d73d99..fb12d3c17c1b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4902,7 +4902,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) ext4_superblock_csum_set(sb); if (sync) lock_buffer(sbh); - if (buffer_write_io_error(sbh)) { + if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the * superblock failed. This could happen because the diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 78d501c1fb65..738e427e2d21 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -363,7 +363,7 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, *phys = 0; *mapped_blocks = 0; - if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) { + if (!is_fat32(sbi) && (inode->i_ino == MSDOS_ROOT_INO)) { if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) { *phys = sector + sbi->dir_start; *mapped_blocks = 1; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index c8366cb8eccd..9d01db37183f 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -57,7 +57,7 @@ static inline void fat_dir_readahead(struct inode *dir, sector_t iblock, if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1) return; /* root dir of FAT12/FAT16 */ - if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO)) + if (!is_fat32(sbi) && (dir->i_ino == MSDOS_ROOT_INO)) return; bh = sb_find_get_block(sb, phys); @@ -805,7 +805,7 @@ static long fat_dir_ioctl(struct file *filp, unsigned int cmd, return fat_generic_ioctl(filp, cmd, arg); } - if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2]))) + if (!access_ok(d1, sizeof(struct __fat_dirent[2]))) return -EFAULT; /* * Yes, we don't need this put_user() absolutely. However old @@ -845,7 +845,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, return fat_generic_ioctl(filp, cmd, (unsigned long)arg); } - if (!access_ok(VERIFY_WRITE, d1, sizeof(struct compat_dirent[2]))) + if (!access_ok(d1, sizeof(struct compat_dirent[2]))) return -EFAULT; /* * Yes, we don't need this put_user() absolutely. However old @@ -1313,7 +1313,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots, } } if (dir->i_ino == MSDOS_ROOT_INO) { - if (sbi->fat_bits != 32) + if (!is_fat32(sbi)) goto error; } else if (MSDOS_I(dir)->i_start == 0) { fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)", diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 4e1b2f6df5e6..922a0c6ba46c 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -142,6 +142,34 @@ static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) return sb->s_fs_info; } +/* + * Functions that determine the variant of the FAT file system (i.e., + * whether this is FAT12, FAT16 or FAT32. + */ +static inline bool is_fat12(const struct msdos_sb_info *sbi) +{ + return sbi->fat_bits == 12; +} + +static inline bool is_fat16(const struct msdos_sb_info *sbi) +{ + return sbi->fat_bits == 16; +} + +static inline bool is_fat32(const struct msdos_sb_info *sbi) +{ + return sbi->fat_bits == 32; +} + +/* Maximum number of clusters */ +static inline u32 max_fat(struct super_block *sb) +{ + struct msdos_sb_info *sbi = MSDOS_SB(sb); + + return is_fat32(sbi) ? MAX_FAT32 : + is_fat16(sbi) ? MAX_FAT16 : MAX_FAT12; +} + static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) { return container_of(inode, struct msdos_inode_info, vfs_inode); @@ -257,7 +285,7 @@ static inline int fat_get_start(const struct msdos_sb_info *sbi, const struct msdos_dir_entry *de) { int cluster = le16_to_cpu(de->start); - if (sbi->fat_bits == 32) + if (is_fat32(sbi)) cluster |= (le16_to_cpu(de->starthi) << 16); return cluster; } diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index f58c0cacc531..495edeafd60a 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -290,19 +290,17 @@ void fat_ent_access_init(struct super_block *sb) mutex_init(&sbi->fat_lock); - switch (sbi->fat_bits) { - case 32: + if (is_fat32(sbi)) { sbi->fatent_shift = 2; sbi->fatent_ops = &fat32_ops; - break; - case 16: + } else if (is_fat16(sbi)) { sbi->fatent_shift = 1; sbi->fatent_ops = &fat16_ops; - break; - case 12: + } else if (is_fat12(sbi)) { sbi->fatent_shift = -1; sbi->fatent_ops = &fat12_ops; - break; + } else { + fat_fs_error(sb, "invalid FAT variant, %u bits", sbi->fat_bits); } } @@ -310,7 +308,7 @@ static void mark_fsinfo_dirty(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); - if (sb_rdonly(sb) || sbi->fat_bits != 32) + if (sb_rdonly(sb) || !is_fat32(sbi)) return; __mark_inode_dirty(sbi->fsinfo_inode, I_DIRTY_SYNC); @@ -327,7 +325,7 @@ static inline int fat_ent_update_ptr(struct super_block *sb, /* Is this fatent's blocks including this entry? */ if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) return 0; - if (sbi->fat_bits == 12) { + if (is_fat12(sbi)) { if ((offset + 1) < sb->s_blocksize) { /* This entry is on bhs[0]. */ if (fatent->nr_bhs == 2) { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index c0b5b5c3373b..79bb0e73a65f 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -686,7 +686,7 @@ static void fat_set_state(struct super_block *sb, b = (struct fat_boot_sector *) bh->b_data; - if (sbi->fat_bits == 32) { + if (is_fat32(sbi)) { if (set) b->fat32.state |= FAT_STATE_DIRTY; else @@ -1396,7 +1396,7 @@ static int fat_read_root(struct inode *inode) inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; - if (sbi->fat_bits == 32) { + if (is_fat32(sbi)) { MSDOS_I(inode)->i_start = sbi->root_cluster; error = fat_calc_dir_size(inode); if (error < 0) @@ -1423,7 +1423,7 @@ static unsigned long calc_fat_clusters(struct super_block *sb) struct msdos_sb_info *sbi = MSDOS_SB(sb); /* Divide first to avoid overflow */ - if (sbi->fat_bits != 12) { + if (!is_fat12(sbi)) { unsigned long ent_per_sec = sb->s_blocksize * 8 / sbi->fat_bits; return ent_per_sec * sbi->fat_length; } @@ -1743,7 +1743,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, } /* interpret volume ID as a little endian 32 bit integer */ - if (sbi->fat_bits == 32) + if (is_fat32(sbi)) sbi->vol_id = bpb.fat32_vol_id; else /* fat 16 or 12 */ sbi->vol_id = bpb.fat16_vol_id; @@ -1769,11 +1769,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, total_clusters = (total_sectors - sbi->data_start) / sbi->sec_per_clus; - if (sbi->fat_bits != 32) + if (!is_fat32(sbi)) sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12; /* some OSes set FAT_STATE_DIRTY and clean it on unmount. */ - if (sbi->fat_bits == 32) + if (is_fat32(sbi)) sbi->dirty = bpb.fat32_state & FAT_STATE_DIRTY; else /* fat 16 or 12 */ sbi->dirty = bpb.fat16_state & FAT_STATE_DIRTY; @@ -1781,7 +1781,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, /* check that FAT table does not overflow */ fat_clusters = calc_fat_clusters(sb); total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); - if (total_clusters > MAX_FAT(sb)) { + if (total_clusters > max_fat(sb)) { if (!silent) fat_msg(sb, KERN_ERR, "count of clusters too big (%u)", total_clusters); @@ -1803,11 +1803,15 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_ent_access_init(sb); /* - * The low byte of FAT's first entry must have same value with - * media-field. But in real world, too many devices is - * writing wrong value. So, removed that validity check. + * The low byte of the first FAT entry must have the same value as + * the media field of the boot sector. But in real world, too many + * devices are writing wrong values. So, removed that validity check. * - * if (FAT_FIRST_ENT(sb, media) != first) + * The removed check compared the first FAT entry to a value dependent + * on the media field like this: + * == (0x0F00 | media), for FAT12 + * == (0XFF00 | media), for FAT16 + * == (0x0FFFFF | media), for FAT32 */ error = -EINVAL; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index fce0a76f3f1e..4fc950bb6433 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -64,7 +64,7 @@ int fat_clusters_flush(struct super_block *sb) struct buffer_head *bh; struct fat_boot_fsinfo *fsinfo; - if (sbi->fat_bits != 32) + if (!is_fat32(sbi)) return 0; bh = sb_bread(sb, sbi->fsinfo_sector); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index f37662675c3a..29a9dcfbe81f 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -565,6 +565,7 @@ const struct inode_operations hfsplus_dir_inode_operations = { .symlink = hfsplus_symlink, .mknod = hfsplus_mknod, .rename = hfsplus_rename, + .getattr = hfsplus_getattr, .listxattr = hfsplus_listxattr, }; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index dd7ad9f13e3a..b8471bf05def 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -488,6 +488,8 @@ void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork); int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd); int hfsplus_cat_write_inode(struct inode *inode); +int hfsplus_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags); int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index d7ab9d8c4b67..d131c8ea7eb6 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -270,6 +270,26 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) return 0; } +int hfsplus_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + struct hfsplus_inode_info *hip = HFSPLUS_I(inode); + + if (inode->i_flags & S_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + if (inode->i_flags & S_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (hip->userflags & HFSPLUS_FLG_NODUMP) + stat->attributes |= STATX_ATTR_NODUMP; + + stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE | + STATX_ATTR_NODUMP; + + generic_fillattr(inode, stat); + return 0; +} + int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { @@ -329,6 +349,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, static const struct inode_operations hfsplus_file_inode_operations = { .setattr = hfsplus_setattr, + .getattr = hfsplus_getattr, .listxattr = hfsplus_listxattr, }; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a2fcea5f8225..32920a10100e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -383,16 +383,17 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end) * truncation is indicated by end of range being LLONG_MAX * In this case, we first scan the range and release found pages. * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv - * maps and global counts. + * maps and global counts. Page faults can not race with truncation + * in this routine. hugetlb_no_page() prevents page faults in the + * truncated range. It checks i_size before allocation, and again after + * with the page table lock for the page held. The same lock must be + * acquired to unmap a page. * hole punch is indicated if end is not LLONG_MAX * In the hole punch case we scan the range and release found pages. * Only when releasing a page is the associated region/reserv map * deleted. The region/reserv map for ranges without associated - * pages are not modified. - * - * Callers of this routine must hold the i_mmap_rwsem in write mode to prevent - * races with page faults. - * + * pages are not modified. Page faults can race with hole punch. + * This is indicated if we find a mapped page. * Note: If the passed end of range value is beyond the end of file, but * not LLONG_MAX this routine still performs a hole punch operation. */ @@ -422,14 +423,32 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, for (i = 0; i < pagevec_count(&pvec); ++i) { struct page *page = pvec.pages[i]; + u32 hash; index = page->index; + hash = hugetlb_fault_mutex_hash(h, current->mm, + &pseudo_vma, + mapping, index, 0); + mutex_lock(&hugetlb_fault_mutex_table[hash]); + /* - * A mapped page is impossible as callers should unmap - * all references before calling. And, i_mmap_rwsem - * prevents the creation of additional mappings. + * If page is mapped, it was faulted in after being + * unmapped in caller. Unmap (again) now after taking + * the fault mutex. The mutex will prevent faults + * until we finish removing the page. + * + * This race can only happen in the hole punch case. + * Getting here in a truncate operation is a bug. */ - VM_BUG_ON(page_mapped(page)); + if (unlikely(page_mapped(page))) { + BUG_ON(truncate_op); + + i_mmap_lock_write(mapping); + hugetlb_vmdelete_list(&mapping->i_mmap, + index * pages_per_huge_page(h), + (index + 1) * pages_per_huge_page(h)); + i_mmap_unlock_write(mapping); + } lock_page(page); /* @@ -451,6 +470,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, } unlock_page(page); + mutex_unlock(&hugetlb_fault_mutex_table[hash]); } huge_pagevec_release(&pvec); cond_resched(); @@ -462,20 +482,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, static void hugetlbfs_evict_inode(struct inode *inode) { - struct address_space *mapping = inode->i_mapping; struct resv_map *resv_map; - /* - * The vfs layer guarantees that there are no other users of this - * inode. Therefore, it would be safe to call remove_inode_hugepages - * without holding i_mmap_rwsem. We acquire and hold here to be - * consistent with other callers. Since there will be no contention - * on the semaphore, overhead is negligible. - */ - i_mmap_lock_write(mapping); remove_inode_hugepages(inode, 0, LLONG_MAX); - i_mmap_unlock_write(mapping); - resv_map = (struct resv_map *)inode->i_mapping->private_data; /* root inode doesn't have the resv_map, so we should check it */ if (resv_map) @@ -496,8 +505,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) i_mmap_lock_write(mapping); if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); - remove_inode_hugepages(inode, offset, LLONG_MAX); i_mmap_unlock_write(mapping); + remove_inode_hugepages(inode, offset, LLONG_MAX); return 0; } @@ -531,8 +540,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) hugetlb_vmdelete_list(&mapping->i_mmap, hole_start >> PAGE_SHIFT, hole_end >> PAGE_SHIFT); - remove_inode_hugepages(inode, hole_start, hole_end); i_mmap_unlock_write(mapping); + remove_inode_hugepages(inode, hole_start, hole_end); inode_unlock(inode); } @@ -615,11 +624,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, /* addr is the offset within the file (zero based) */ addr = index * hpage_size; - /* - * fault mutex taken here, protects against fault path - * and hole punch. inode_lock previously taken protects - * against truncation. - */ + /* mutex taken here, fault path and hole punch */ hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, index, addr); mutex_lock(&hugetlb_fault_mutex_table[hash]); diff --git a/fs/ioctl.c b/fs/ioctl.c index d64f622cac8b..fef3a6bf7c78 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -203,7 +203,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) fieinfo.fi_extents_start = ufiemap->fm_extents; if (fiemap.fm_extent_count != 0 && - !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, + !access_ok(fieinfo.fi_extents_start, fieinfo.fi_extents_max * sizeof(struct fiemap_extent))) return -EFAULT; diff --git a/fs/iomap.c b/fs/iomap.c index 3a0cd557b4cf..a3088fae567b 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1921,8 +1921,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, return -EIOCBQUEUED; for (;;) { - __set_current_state(TASK_UNINTERRUPTIBLE); - + set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(dio->submit.waiter)) break; diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 00d5ef5f99f7..214a2fa1f1e3 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -128,24 +128,14 @@ static void encode_netobj(struct xdr_stream *xdr, static int decode_netobj(struct xdr_stream *xdr, struct xdr_netobj *obj) { - u32 length; - __be32 *p; + ssize_t ret; - p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; - length = be32_to_cpup(p++); - if (unlikely(length > XDR_MAX_NETOBJ)) - goto out_size; - obj->len = length; - obj->data = (u8 *)p; + ret = xdr_stream_decode_opaque_inline(xdr, (void *)&obj->data, + XDR_MAX_NETOBJ); + if (unlikely(ret < 0)) + return -EIO; + obj->len = ret; return 0; -out_size: - dprintk("NFS: returned netobj was too long: %u\n", length); - return -EIO; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index d20b92f271c2..e8a004097d18 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -256,7 +256,7 @@ static int nlm_wait_on_grace(wait_queue_head_t *queue) * Generic NLM call */ static int -nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc) +nlmclnt_call(const struct cred *cred, struct nlm_rqst *req, u32 proc) { struct nlm_host *host = req->a_host; struct rpc_clnt *clnt; @@ -401,7 +401,7 @@ int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *t * completion in order to be able to correctly track the lock * state. */ -static int nlmclnt_async_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) +static int nlmclnt_async_call(const struct cred *cred, struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) { struct rpc_message msg = { .rpc_argp = &req->a_args, @@ -442,7 +442,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl) fl->fl_start = req->a_res.lock.fl.fl_start; fl->fl_end = req->a_res.lock.fl.fl_end; fl->fl_type = req->a_res.lock.fl.fl_type; - fl->fl_pid = 0; + fl->fl_pid = -req->a_res.lock.fl.fl_pid; break; default: status = nlm_stat_to_errno(req->a_res.status); @@ -510,7 +510,7 @@ static int do_vfs_lock(struct file_lock *fl) static int nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) { - struct rpc_cred *cred = nfs_file_cred(fl->fl_file); + const struct cred *cred = nfs_file_cred(fl->fl_file); struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; struct nlm_wait *block = NULL; diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 2c6176387143..747b9c8c940a 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -125,24 +125,14 @@ static void encode_netobj(struct xdr_stream *xdr, static int decode_netobj(struct xdr_stream *xdr, struct xdr_netobj *obj) { - u32 length; - __be32 *p; + ssize_t ret; - p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; - length = be32_to_cpup(p++); - if (unlikely(length > XDR_MAX_NETOBJ)) - goto out_size; - obj->len = length; - obj->data = (u8 *)p; + ret = xdr_stream_decode_opaque_inline(xdr, (void *)&obj->data, + XDR_MAX_NETOBJ); + if (unlikely(ret < 0)) + return -EIO; + obj->len = ret; return 0; -out_size: - dprintk("NFS: returned netobj was too long: %u\n", length); - return -EIO; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 7147e4aebecc..9846f7e95282 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -127,7 +127,7 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock) locks_init_lock(fl); fl->fl_owner = current->files; - fl->fl_pid = (pid_t)lock->svid; + fl->fl_pid = current->tgid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ start = ntohl(*p++); @@ -269,7 +269,7 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); lock->svid = ~(u32) 0; - lock->fl.fl_pid = (pid_t)lock->svid; + lock->fl.fl_pid = current->tgid; if (!(p = nlm_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 7ed9edf9aed4..70154f376695 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -119,7 +119,7 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock) locks_init_lock(fl); fl->fl_owner = current->files; - fl->fl_pid = (pid_t)lock->svid; + fl->fl_pid = current->tgid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ p = xdr_decode_hyper(p, &start); @@ -266,7 +266,7 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); lock->svid = ~(u32) 0; - lock->fl.fl_pid = (pid_t)lock->svid; + lock->fl.fl_pid = current->tgid; if (!(p = nlm4_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, diff --git a/fs/locks.c b/fs/locks.c index f0b24d98f36b..ff6af2c32601 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -453,7 +453,7 @@ static void locks_move_blocks(struct file_lock *new, struct file_lock *fl) return; spin_lock(&blocked_lock_lock); list_splice_init(&fl->fl_blocked_requests, &new->fl_blocked_requests); - list_for_each_entry(f, &fl->fl_blocked_requests, fl_blocked_member) + list_for_each_entry(f, &new->fl_blocked_requests, fl_blocked_member) f->fl_blocker = new; spin_unlock(&blocked_lock_lock); } diff --git a/fs/namespace.c b/fs/namespace.c index a7f91265ea67..a677b59efd74 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -26,6 +26,7 @@ #include <linux/memblock.h> #include <linux/task_work.h> #include <linux/sched/task.h> +#include <uapi/linux/mount.h> #include "pnode.h" #include "internal.h" @@ -245,13 +246,9 @@ out_free_cache: * mnt_want/drop_write() will _keep_ the filesystem * r/w. */ -int __mnt_is_readonly(struct vfsmount *mnt) +bool __mnt_is_readonly(struct vfsmount *mnt) { - if (mnt->mnt_flags & MNT_READONLY) - return 1; - if (sb_rdonly(mnt->mnt_sb)) - return 1; - return 0; + return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb); } EXPORT_SYMBOL_GPL(__mnt_is_readonly); @@ -507,11 +504,12 @@ static int mnt_make_readonly(struct mount *mnt) return ret; } -static void __mnt_unmake_readonly(struct mount *mnt) +static int __mnt_unmake_readonly(struct mount *mnt) { lock_mount_hash(); mnt->mnt.mnt_flags &= ~MNT_READONLY; unlock_mount_hash(); + return 0; } int sb_prepare_remount_readonly(struct super_block *sb) @@ -1360,7 +1358,7 @@ static void namespace_unlock(void) if (likely(hlist_empty(&head))) return; - synchronize_rcu(); + synchronize_rcu_expedited(); group_pin_kill(&head); } @@ -2215,21 +2213,91 @@ out: return err; } -static int change_mount_flags(struct vfsmount *mnt, int ms_flags) +/* + * Don't allow locked mount flags to be cleared. + * + * No locks need to be held here while testing the various MNT_LOCK + * flags because those flags can never be cleared once they are set. + */ +static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags) { - int error = 0; - int readonly_request = 0; + unsigned int fl = mnt->mnt.mnt_flags; + + if ((fl & MNT_LOCK_READONLY) && + !(mnt_flags & MNT_READONLY)) + return false; - if (ms_flags & MS_RDONLY) - readonly_request = 1; - if (readonly_request == __mnt_is_readonly(mnt)) + if ((fl & MNT_LOCK_NODEV) && + !(mnt_flags & MNT_NODEV)) + return false; + + if ((fl & MNT_LOCK_NOSUID) && + !(mnt_flags & MNT_NOSUID)) + return false; + + if ((fl & MNT_LOCK_NOEXEC) && + !(mnt_flags & MNT_NOEXEC)) + return false; + + if ((fl & MNT_LOCK_ATIME) && + ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) + return false; + + return true; +} + +static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags) +{ + bool readonly_request = (mnt_flags & MNT_READONLY); + + if (readonly_request == __mnt_is_readonly(&mnt->mnt)) return 0; if (readonly_request) - error = mnt_make_readonly(real_mount(mnt)); - else - __mnt_unmake_readonly(real_mount(mnt)); - return error; + return mnt_make_readonly(mnt); + + return __mnt_unmake_readonly(mnt); +} + +/* + * Update the user-settable attributes on a mount. The caller must hold + * sb->s_umount for writing. + */ +static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags) +{ + lock_mount_hash(); + mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; + mnt->mnt.mnt_flags = mnt_flags; + touch_mnt_namespace(mnt->mnt_ns); + unlock_mount_hash(); +} + +/* + * Handle reconfiguration of the mountpoint only without alteration of the + * superblock it refers to. This is triggered by specifying MS_REMOUNT|MS_BIND + * to mount(2). + */ +static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags) +{ + struct super_block *sb = path->mnt->mnt_sb; + struct mount *mnt = real_mount(path->mnt); + int ret; + + if (!check_mnt(mnt)) + return -EINVAL; + + if (path->dentry != mnt->mnt.mnt_root) + return -EINVAL; + + if (!can_change_locked_flags(mnt, mnt_flags)) + return -EPERM; + + down_write(&sb->s_umount); + ret = change_mount_ro_state(mnt, mnt_flags); + if (ret == 0) + set_mount_attributes(mnt, mnt_flags); + up_write(&sb->s_umount); + return ret; } /* @@ -2243,6 +2311,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, int err; struct super_block *sb = path->mnt->mnt_sb; struct mount *mnt = real_mount(path->mnt); + void *sec_opts = NULL; if (!check_mnt(mnt)) return -EINVAL; @@ -2250,50 +2319,25 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags, if (path->dentry != path->mnt->mnt_root) return -EINVAL; - /* Don't allow changing of locked mnt flags. - * - * No locks need to be held here while testing the various - * MNT_LOCK flags because those flags can never be cleared - * once they are set. - */ - if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && - !(mnt_flags & MNT_READONLY)) { - return -EPERM; - } - if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && - !(mnt_flags & MNT_NODEV)) { - return -EPERM; - } - if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && - !(mnt_flags & MNT_NOSUID)) { - return -EPERM; - } - if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) && - !(mnt_flags & MNT_NOEXEC)) { - return -EPERM; - } - if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && - ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) { + if (!can_change_locked_flags(mnt, mnt_flags)) return -EPERM; - } - err = security_sb_remount(sb, data); + if (data && !(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)) { + err = security_sb_eat_lsm_opts(data, &sec_opts); + if (err) + return err; + } + err = security_sb_remount(sb, sec_opts); + security_free_mnt_opts(&sec_opts); if (err) return err; down_write(&sb->s_umount); - if (ms_flags & MS_BIND) - err = change_mount_flags(path->mnt, ms_flags); - else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) - err = -EPERM; - else + err = -EPERM; + if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { err = do_remount_sb(sb, sb_flags, data, 0); - if (!err) { - lock_mount_hash(); - mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; - mnt->mnt.mnt_flags = mnt_flags; - touch_mnt_namespace(mnt->mnt_ns); - unlock_mount_hash(); + if (!err) + set_mount_attributes(mnt, mnt_flags); } up_write(&sb->s_umount); return err; @@ -2651,7 +2695,7 @@ static long exact_copy_from_user(void *to, const void __user * from, const char __user *f = from; char c; - if (!access_ok(VERIFY_READ, from, n)) + if (!access_ok(from, n)) return n; current->kernel_uaccess_faults_ok++; @@ -2788,7 +2832,9 @@ long do_mount(const char *dev_name, const char __user *dir_name, SB_LAZYTIME | SB_I_VERSION); - if (flags & MS_REMOUNT) + if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND)) + retval = do_reconfigure_mnt(&path, mnt_flags); + else if (flags & MS_REMOUNT) retval = do_remount(&path, flags, sb_flags, mnt_flags, data_page); else if (flags & MS_BIND) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index d3781cd983f6..690221747b47 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -584,7 +584,7 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) static struct nfs4_deviceid_node * bl_find_get_deviceid(struct nfs_server *server, - const struct nfs4_deviceid *id, struct rpc_cred *cred, + const struct nfs4_deviceid *id, const struct cred *cred, gfp_t gfp_mask) { struct nfs4_deviceid_node *node; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 509dc5adeb8f..0b602a39dd71 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -56,7 +56,7 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nn->nfs_callback_tcpport6 = ret; - dprintk("NFS: Callback listener port = %u (af %u, net %x\n", + dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", nn->nfs_callback_tcpport6, PF_INET6, net->ns.inum); } else if (ret != -EAFNOSUPPORT) goto out_err; @@ -206,11 +206,13 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, goto err_bind; } - ret = -EPROTONOSUPPORT; + ret = 0; if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0) ret = nfs4_callback_up_net(serv, net); - else if (xprt->ops->bc_up) - ret = xprt->ops->bc_up(serv, net); + else if (xprt->ops->bc_setup) + set_bc_enabled(serv); + else + ret = -EPROTONOSUPPORT; if (ret < 0) { printk(KERN_ERR "NFS: callback service start failed\n"); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 96d5f8135eb9..fb1cf1a4bda2 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -151,7 +151,6 @@ EXPORT_SYMBOL_GPL(unregister_nfs_version); struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) { struct nfs_client *clp; - struct rpc_cred *cred; int err = -ENOMEM; if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) @@ -182,9 +181,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_proto = cl_init->proto; clp->cl_net = get_net(cl_init->net); - cred = rpc_lookup_machine_cred("*"); - if (!IS_ERR(cred)) - clp->cl_machine_cred = cred; + clp->cl_principal = "*"; nfs_fscache_get_client_cookie(clp); return clp; @@ -246,9 +243,6 @@ void nfs_free_client(struct nfs_client *clp) if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); - if (clp->cl_machine_cred != NULL) - put_rpccred(clp->cl_machine_cred); - put_net(clp->cl_net); put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); @@ -527,6 +521,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, return PTR_ERR(clnt); } + clnt->cl_principal = clp->cl_principal; clp->cl_rpcclient = clnt; return 0; } diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 6ec2f78c1e19..885363ca8569 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -26,10 +26,8 @@ static void nfs_free_delegation(struct nfs_delegation *delegation) { - if (delegation->cred) { - put_rpccred(delegation->cred); - delegation->cred = NULL; - } + put_cred(delegation->cred); + delegation->cred = NULL; kfree_rcu(delegation, rcu); } @@ -178,13 +176,13 @@ again: * @pagemod_limit: write delegation "space_limit" * */ -void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, +void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit) { struct nfs_delegation *delegation; - struct rpc_cred *oldcred = NULL; + const struct cred *oldcred = NULL; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); @@ -195,12 +193,12 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, delegation->type = type; delegation->pagemod_limit = pagemod_limit; oldcred = delegation->cred; - delegation->cred = get_rpccred(cred); + delegation->cred = get_cred(cred); clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); spin_unlock(&delegation->lock); rcu_read_unlock(); - put_rpccred(oldcred); + put_cred(oldcred); trace_nfs4_reclaim_delegation(inode, type); return; } @@ -341,7 +339,7 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, * * Returns zero on success, or a negative errno value. */ -int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, +int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit) @@ -360,7 +358,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, delegation->type = type; delegation->pagemod_limit = pagemod_limit; delegation->change_attr = inode_peek_iversion_raw(inode); - delegation->cred = get_rpccred(cred); + delegation->cred = get_cred(cred); delegation->inode = inode; delegation->flags = 1<<NFS_DELEGATION_REFERENCED; spin_lock_init(&delegation->lock); @@ -1047,7 +1045,7 @@ void nfs_reap_expired_delegations(struct nfs_client *clp) struct nfs_delegation *delegation; struct nfs_server *server; struct inode *inode; - struct rpc_cred *cred; + const struct cred *cred; nfs4_stateid stateid; restart: @@ -1069,7 +1067,7 @@ restart: nfs_sb_deactive(server->super); goto restart; } - cred = get_rpccred_rcu(delegation->cred); + cred = get_cred_rcu(delegation->cred); nfs4_stateid_copy(&stateid, &delegation->stateid); clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); rcu_read_unlock(); @@ -1078,7 +1076,7 @@ restart: nfs_revoke_delegation(inode, &stateid); nfs_inode_find_state_and_recover(inode, &stateid); } - put_rpccred(cred); + put_cred(cred); if (nfs4_server_rebooted(clp)) { nfs_inode_mark_test_expired_delegation(server,inode); iput(inode); @@ -1173,7 +1171,7 @@ out: * otherwise "false" is returned. */ bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, - nfs4_stateid *dst, struct rpc_cred **cred) + nfs4_stateid *dst, const struct cred **cred) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; @@ -1187,7 +1185,7 @@ bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid_copy(dst, &delegation->stateid); nfs_mark_delegation_referenced(delegation); if (cred) - *cred = get_rpccred(delegation->cred); + *cred = get_cred(delegation->cred); } rcu_read_unlock(); return ret; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index bb1ef8c37af4..dcbf3394ba0e 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -15,7 +15,7 @@ */ struct nfs_delegation { struct list_head super_list; - struct rpc_cred *cred; + const struct cred *cred; struct inode *inode; nfs4_stateid stateid; fmode_t type; @@ -36,9 +36,9 @@ enum { NFS_DELEGATION_TEST_EXPIRED, }; -int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, +int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); -void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, +void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); int nfs4_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); @@ -60,10 +60,10 @@ void nfs_mark_test_expired_all_delegations(struct nfs_client *clp); void nfs_reap_expired_delegations(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ -int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); +int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); -bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred); +bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred); bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 71b2e390becf..6bf4471850c8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -67,7 +67,7 @@ const struct address_space_operations nfs_dir_aops = { .freepage = nfs_readdir_clear_array, }; -static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) +static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, const struct cred *cred) { struct nfs_inode *nfsi = NFS_I(dir); struct nfs_open_dir_context *ctx; @@ -77,7 +77,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir ctx->attr_gencount = nfsi->attr_gencount; ctx->dir_cookie = 0; ctx->dup_cookie = 0; - ctx->cred = get_rpccred(cred); + ctx->cred = get_cred(cred); spin_lock(&dir->i_lock); list_add(&ctx->list, &nfsi->open_files); spin_unlock(&dir->i_lock); @@ -91,7 +91,7 @@ static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_cont spin_lock(&dir->i_lock); list_del(&ctx->list); spin_unlock(&dir->i_lock); - put_rpccred(ctx->cred); + put_cred(ctx->cred); kfree(ctx); } @@ -103,23 +103,18 @@ nfs_opendir(struct inode *inode, struct file *filp) { int res = 0; struct nfs_open_dir_context *ctx; - struct rpc_cred *cred; dfprintk(FILE, "NFS: open dir(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSOPEN); - cred = rpc_lookup_cred(); - if (IS_ERR(cred)) - return PTR_ERR(cred); - ctx = alloc_nfs_open_dir_context(inode, cred); + ctx = alloc_nfs_open_dir_context(inode, current_cred()); if (IS_ERR(ctx)) { res = PTR_ERR(ctx); goto out; } filp->private_data = ctx; out: - put_rpccred(cred); return res; } @@ -334,7 +329,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct file *file, struct inode *inode) { struct nfs_open_dir_context *ctx = file->private_data; - struct rpc_cred *cred = ctx->cred; + const struct cred *cred = ctx->cred; unsigned long timestamp, gencount; int error; @@ -2139,7 +2134,7 @@ MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache lengt static void nfs_access_free_entry(struct nfs_access_entry *entry) { - put_rpccred(entry->cred); + put_cred(entry->cred); kfree_rcu(entry, rcu_head); smp_mb__before_atomic(); atomic_long_dec(&nfs_access_nr_entries); @@ -2265,17 +2260,18 @@ void nfs_access_zap_cache(struct inode *inode) } EXPORT_SYMBOL_GPL(nfs_access_zap_cache); -static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) +static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred) { struct rb_node *n = NFS_I(inode)->access_cache.rb_node; - struct nfs_access_entry *entry; while (n != NULL) { - entry = rb_entry(n, struct nfs_access_entry, rb_node); + struct nfs_access_entry *entry = + rb_entry(n, struct nfs_access_entry, rb_node); + int cmp = cred_fscmp(cred, entry->cred); - if (cred < entry->cred) + if (cmp < 0) n = n->rb_left; - else if (cred > entry->cred) + else if (cmp > 0) n = n->rb_right; else return entry; @@ -2283,7 +2279,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st return NULL; } -static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res, bool may_block) +static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_access_entry *cache; @@ -2326,7 +2322,7 @@ out_zap: return -ENOENT; } -static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res) { /* Only check the most recently returned cache entry, * but do it without locking. @@ -2363,15 +2359,17 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry * struct rb_node **p = &root_node->rb_node; struct rb_node *parent = NULL; struct nfs_access_entry *entry; + int cmp; spin_lock(&inode->i_lock); while (*p != NULL) { parent = *p; entry = rb_entry(parent, struct nfs_access_entry, rb_node); + cmp = cred_fscmp(set->cred, entry->cred); - if (set->cred < entry->cred) + if (cmp < 0) p = &parent->rb_left; - else if (set->cred > entry->cred) + else if (cmp > 0) p = &parent->rb_right; else goto found; @@ -2395,7 +2393,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) if (cache == NULL) return; RB_CLEAR_NODE(&cache->rb_node); - cache->cred = get_rpccred(set->cred); + cache->cred = get_cred(set->cred); cache->mask = set->mask; /* The above field assignments must be visible @@ -2459,7 +2457,7 @@ void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) } EXPORT_SYMBOL_GPL(nfs_access_set_mask); -static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) +static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) { struct nfs_access_entry cache; bool may_block = (mask & MAY_NOT_BLOCK) == 0; @@ -2523,7 +2521,7 @@ static int nfs_open_permission_mask(int openflags) return mask; } -int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) +int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags) { return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } @@ -2548,7 +2546,7 @@ static int nfs_execute_ok(struct inode *inode, int mask) int nfs_permission(struct inode *inode, int mask) { - struct rpc_cred *cred; + const struct cred *cred = current_cred(); int res = 0; nfs_inc_stats(inode, NFSIOS_VFSACCESS); @@ -2582,20 +2580,11 @@ force_lookup: /* Always try fast lookups first */ rcu_read_lock(); - cred = rpc_lookup_cred_nonblock(); - if (!IS_ERR(cred)) - res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK); - else - res = PTR_ERR(cred); + res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK); rcu_read_unlock(); if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) { /* Fast lookup failed, try the slow way */ - cred = rpc_lookup_cred(); - if (!IS_ERR(cred)) { - res = nfs_do_access(inode, cred, mask); - put_rpccred(cred); - } else - res = PTR_ERR(cred); + res = nfs_do_access(inode, cred, mask); } out: if (!res && (mask & MAY_EXEC)) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 310d7500f665..63abe705f4ca 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -9,6 +9,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_page.h> #include <linux/module.h> +#include <linux/sched/mm.h> #include <linux/sunrpc/metrics.h> @@ -27,9 +28,6 @@ #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) #define FF_LAYOUTRETURN_MAXERR 20 - -static struct group_info *ff_zero_group; - static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, struct nfs_pgio_header *hdr); static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, @@ -226,16 +224,14 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) { - struct rpc_cred *cred; + const struct cred *cred; ff_layout_remove_mirror(mirror); kfree(mirror->fh_versions); cred = rcu_access_pointer(mirror->ro_cred); - if (cred) - put_rpccred(cred); + put_cred(cred); cred = rcu_access_pointer(mirror->rw_cred); - if (cred) - put_rpccred(cred); + put_cred(cred); nfs4_ff_layout_put_deviceid(mirror->mirror_ds); kfree(mirror); } @@ -413,8 +409,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, for (i = 0; i < fls->mirror_array_cnt; i++) { struct nfs4_ff_layout_mirror *mirror; - struct auth_cred acred = { .group_info = ff_zero_group }; - struct rpc_cred __rcu *cred; + struct cred *kcred; + const struct cred *cred; + kuid_t uid; + kgid_t gid; u32 ds_count, fh_count, id; int j; @@ -482,21 +480,28 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, if (rc) goto out_err_free; - acred.uid = make_kuid(&init_user_ns, id); + uid = make_kuid(&init_user_ns, id); /* group */ rc = decode_name(&stream, &id); if (rc) goto out_err_free; - acred.gid = make_kgid(&init_user_ns, id); + gid = make_kgid(&init_user_ns, id); - /* find the cred for it */ - rcu_assign_pointer(cred, rpc_lookup_generic_cred(&acred, 0, gfp_flags)); - if (IS_ERR(cred)) { - rc = PTR_ERR(cred); - goto out_err_free; + if (gfp_flags & __GFP_FS) + kcred = prepare_kernel_cred(NULL); + else { + unsigned int nofs_flags = memalloc_nofs_save(); + kcred = prepare_kernel_cred(NULL); + memalloc_nofs_restore(nofs_flags); } + rc = -ENOMEM; + if (!kcred) + goto out_err_free; + kcred->fsuid = uid; + kcred->fsgid = gid; + cred = kcred; if (lgr->range.iomode == IOMODE_READ) rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); @@ -519,8 +524,8 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, dprintk("%s: iomode %s uid %u gid %u\n", __func__, lgr->range.iomode == IOMODE_READ ? "READ" : "RW", - from_kuid(&init_user_ns, acred.uid), - from_kgid(&init_user_ns, acred.gid)); + from_kuid(&init_user_ns, uid), + from_kgid(&init_user_ns, gid)); } p = xdr_inline_decode(&stream, 4); @@ -1698,7 +1703,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; - struct rpc_cred *ds_cred; + const struct cred *ds_cred; loff_t offset = hdr->args.offset; u32 idx = hdr->pgio_mirror_idx; int vers; @@ -1749,7 +1754,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) vers == 3 ? &ff_layout_read_call_ops_v3 : &ff_layout_read_call_ops_v4, 0, RPC_TASK_SOFTCONN); - put_rpccred(ds_cred); + put_cred(ds_cred); return PNFS_ATTEMPTED; out_failed: @@ -1765,7 +1770,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; - struct rpc_cred *ds_cred; + const struct cred *ds_cred; loff_t offset = hdr->args.offset; int vers; struct nfs_fh *fh; @@ -1814,7 +1819,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) vers == 3 ? &ff_layout_write_call_ops_v3 : &ff_layout_write_call_ops_v4, sync, RPC_TASK_SOFTCONN); - put_rpccred(ds_cred); + put_cred(ds_cred); return PNFS_ATTEMPTED; out_failed: @@ -1844,7 +1849,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) struct pnfs_layout_segment *lseg = data->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; - struct rpc_cred *ds_cred; + const struct cred *ds_cred; u32 idx; int vers, ret; struct nfs_fh *fh; @@ -1884,7 +1889,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) vers == 3 ? &ff_layout_commit_call_ops_v3 : &ff_layout_commit_call_ops_v4, how, RPC_TASK_SOFTCONN); - put_rpccred(ds_cred); + put_cred(ds_cred); return ret; out_err: pnfs_generic_prepare_to_resend_writes(data); @@ -2383,11 +2388,6 @@ static int __init nfs4flexfilelayout_init(void) { printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n", __func__); - if (!ff_zero_group) { - ff_zero_group = groups_alloc(0); - if (!ff_zero_group) - return -ENOMEM; - } return pnfs_register_layoutdriver(&flexfilelayout_type); } @@ -2396,10 +2396,6 @@ static void __exit nfs4flexfilelayout_exit(void) printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n", __func__); pnfs_unregister_layoutdriver(&flexfilelayout_type); - if (ff_zero_group) { - put_group_info(ff_zero_group); - ff_zero_group = NULL; - } } MODULE_ALIAS("nfs-layouttype4-4"); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index de50a342d5a5..c2626bad466b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -81,8 +81,8 @@ struct nfs4_ff_layout_mirror { u32 fh_versions_cnt; struct nfs_fh *fh_versions; nfs4_stateid stateid; - struct rpc_cred __rcu *ro_cred; - struct rpc_cred __rcu *rw_cred; + const struct cred __rcu *ro_cred; + const struct cred __rcu *rw_cred; refcount_t ref; spinlock_t lock; unsigned long flags; @@ -229,8 +229,8 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, struct nfs_client *ds_clp, struct inode *inode); -struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, - u32 ds_idx, struct rpc_cred *mdscred); +const struct cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, + u32 ds_idx, const struct cred *mdscred); bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg); bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index d23347389626..11766a74216d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -330,10 +330,10 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, return 0; } -static struct rpc_cred * +static const struct cred * ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) { - struct rpc_cred *cred, __rcu **pcred; + const struct cred *cred, __rcu **pcred; if (iomode == IOMODE_READ) pcred = &mirror->ro_cred; @@ -346,7 +346,7 @@ ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode) if (!cred) break; - cred = get_rpccred_rcu(cred); + cred = get_cred_rcu(cred); } while(!cred); rcu_read_unlock(); return cred; @@ -465,19 +465,19 @@ out: return ds; } -struct rpc_cred * +const struct cred * ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, - struct rpc_cred *mdscred) + const struct cred *mdscred) { struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); - struct rpc_cred *cred; + const struct cred *cred; if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) { cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode); if (!cred) - cred = get_rpccred(mdscred); + cred = get_cred(mdscred); } else { - cred = get_rpccred(mdscred); + cred = get_cred(mdscred); } return cred; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5b1eee4952b7..094775ea0781 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -950,18 +950,17 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct file *filp) { struct nfs_open_context *ctx; - struct rpc_cred *cred = rpc_lookup_cred(); - if (IS_ERR(cred)) - return ERR_CAST(cred); + const struct cred *cred = get_current_cred(); ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) { - put_rpccred(cred); + put_cred(cred); return ERR_PTR(-ENOMEM); } nfs_sb_active(dentry->d_sb); ctx->dentry = dget(dentry); ctx->cred = cred; + ctx->ll_cred = NULL; ctx->state = NULL; ctx->mode = f_mode; ctx->flags = 0; @@ -997,10 +996,10 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) } if (inode != NULL) NFS_PROTO(inode)->close_context(ctx, is_sync); - if (ctx->cred != NULL) - put_rpccred(ctx->cred); + put_cred(ctx->cred); dput(ctx->dentry); nfs_sb_deactive(sb); + put_rpccred(ctx->ll_cred); kfree(ctx->mdsthreshold); kfree_rcu(ctx, rcu_head); } @@ -1042,7 +1041,7 @@ EXPORT_SYMBOL_GPL(nfs_file_set_open_context); /* * Given an inode, search for an open context with the desired characteristics */ -struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode) +struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct cred *cred, fmode_t mode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *pos, *ctx = NULL; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8357ff69962f..b1e577302518 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -123,7 +123,7 @@ struct nfs_parsed_mount_data { unsigned short protocol; } nfs_server; - struct security_mnt_opts lsm_opts; + void *lsm_opts; struct net *net; }; @@ -254,7 +254,7 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); void nfs_pgio_header_free(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, - struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, + const struct cred *cred, const struct nfs_rpc_ops *rpc_ops, const struct rpc_call_ops *call_ops, int how, int flags); void nfs_free_request(struct nfs_page *req); struct nfs_pgio_mirror * @@ -269,7 +269,7 @@ static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1, const struct nfs_open_context *ctx2) { - return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state; + return cred_fscmp(ctx1->cred, ctx2->cred) == 0 && ctx1->state == ctx2->state; } /* nfs2xdr.c */ @@ -395,7 +395,6 @@ extern const struct super_operations nfs_sops; extern struct file_system_type nfs_fs_type; extern struct file_system_type nfs_xdev_fs_type; #if IS_ENABLED(CONFIG_NFS_V4) -extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); @@ -565,13 +564,13 @@ extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct nfs_client_initdata *); extern int nfs40_walk_client_list(struct nfs_client *clp, struct nfs_client **result, - struct rpc_cred *cred); + const struct cred *cred); extern int nfs41_walk_client_list(struct nfs_client *clp, struct nfs_client **result, - struct rpc_cred *cred); -extern int nfs4_test_session_trunk(struct rpc_clnt *, - struct rpc_xprt *, - void *); + const struct cred *cred); +extern void nfs4_test_session_trunk(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + void *data); static inline struct inode *nfs_igrab_and_active(struct inode *inode) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 71bc16225b98..a3ad2d46fd42 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -607,7 +607,7 @@ out: * readdirplus. */ static int -nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, +nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred, u64 cookie, struct page **pages, unsigned int count, bool plus) { struct inode *dir = d_inode(dentry); @@ -628,7 +628,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, .rpc_proc = &nfs3_procedures[NFS3PROC_READDIR], .rpc_argp = &arg, .rpc_resp = &res, - .rpc_cred = cred + .rpc_cred = cred, }; int status = -ENOMEM; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1b994b527518..06ac3d9ac7c6 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -62,10 +62,11 @@ struct nfs4_minor_version_ops { void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); int (*test_and_free_expired)(struct nfs_server *, - nfs4_stateid *, struct rpc_cred *); + nfs4_stateid *, const struct cred *); struct nfs_seqid * (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); - int (*session_trunk)(struct rpc_clnt *, struct rpc_xprt *, void *); + void (*session_trunk)(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, void *data); const struct rpc_call_ops *call_sync_ops; const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; @@ -107,7 +108,7 @@ struct nfs4_state_owner { unsigned long so_expires; struct rb_node so_server_node; - struct rpc_cred *so_cred; /* Associated cred */ + const struct cred *so_cred; /* Associated cred */ spinlock_t so_lock; atomic_t so_count; @@ -212,10 +213,10 @@ struct nfs4_state_recovery_ops { int state_flag_bit; int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); int (*recover_lock)(struct nfs4_state *, struct file_lock *); - int (*establish_clid)(struct nfs_client *, struct rpc_cred *); - int (*reclaim_complete)(struct nfs_client *, struct rpc_cred *); + int (*establish_clid)(struct nfs_client *, const struct cred *); + int (*reclaim_complete)(struct nfs_client *, const struct cred *); int (*detect_trunking)(struct nfs_client *, struct nfs_client **, - struct rpc_cred *); + const struct cred *); }; struct nfs4_opendata { @@ -245,19 +246,19 @@ struct nfs4_opendata { struct nfs4_add_xprt_data { struct nfs_client *clp; - struct rpc_cred *cred; + const struct cred *cred; }; struct nfs4_state_maintenance_ops { - int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned); - struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); - int (*renew_lease)(struct nfs_client *, struct rpc_cred *); + int (*sched_state_renewal)(struct nfs_client *, const struct cred *, unsigned); + const struct cred * (*get_state_renewal_cred)(struct nfs_client *); + int (*renew_lease)(struct nfs_client *, const struct cred *); }; struct nfs4_mig_recovery_ops { int (*get_locations)(struct inode *, struct nfs4_fs_locations *, - struct page *, struct rpc_cred *); - int (*fsid_present)(struct inode *, struct rpc_cred *); + struct page *, const struct cred *); + int (*fsid_present)(struct inode *, const struct cred *); }; extern const struct dentry_operations nfs4_dentry_operations; @@ -286,21 +287,21 @@ extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int, int); -extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); -extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); +extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, const struct cred *, struct nfs4_setclientid_res *); +extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, const struct cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); -extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred); -extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); +extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, const struct cred *cred); +extern int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred); extern int nfs4_destroy_clientid(struct nfs_client *clp); -extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); -extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); +extern int nfs4_init_clientid(struct nfs_client *, const struct cred *); +extern int nfs41_init_clientid(struct nfs_client *, const struct cred *); extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, struct nfs4_fs_locations *, struct page *); extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *, - struct page *page, struct rpc_cred *); -extern int nfs4_proc_fsid_present(struct inode *, struct rpc_cred *); + struct page *page, const struct cred *); +extern int nfs4_proc_fsid_present(struct inode *, const struct cred *); extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, const struct qstr *, struct nfs_fh *, struct nfs_fattr *); extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); @@ -312,8 +313,8 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, #if defined(CONFIG_NFS_V4_1) extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *); -extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *); -extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *); +extern int nfs4_proc_create_session(struct nfs_client *, const struct cred *); +extern int nfs4_proc_destroy_session(struct nfs4_session *, const struct cred *); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, @@ -338,7 +339,6 @@ static inline bool _nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, struct rpc_clnt **clntp, struct rpc_message *msg) { - struct rpc_cred *newcred = NULL; rpc_authflavor_t flavor; if (sp4_mode == NFS_SP4_MACH_CRED_CLEANUP || @@ -353,13 +353,7 @@ _nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, return false; } if (test_bit(sp4_mode, &clp->cl_sp4_flags)) { - spin_lock(&clp->cl_lock); - if (clp->cl_machine_cred != NULL) - /* don't call get_rpccred on the machine cred - - * a reference will be held for life of clp */ - newcred = clp->cl_machine_cred; - spin_unlock(&clp->cl_lock); - msg->rpc_cred = newcred; + msg->rpc_cred = rpc_machine_cred(); flavor = clp->cl_rpcclient->cl_auth->au_flavor; WARN_ON_ONCE(flavor != RPC_AUTH_GSS_KRB5I && @@ -450,16 +444,16 @@ extern void nfs4_set_lease_period(struct nfs_client *clp, /* nfs4state.c */ -struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp); -struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); -struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); +const struct cred *nfs4_get_clid_cred(struct nfs_client *clp); +const struct cred *nfs4_get_machine_cred(struct nfs_client *clp); +const struct cred *nfs4_get_renew_cred(struct nfs_client *clp); int nfs4_discover_server_trunking(struct nfs_client *clp, struct nfs_client **); int nfs40_discover_server_trunking(struct nfs_client *clp, - struct nfs_client **, struct rpc_cred *); + struct nfs_client **, const struct cred *); #if defined(CONFIG_NFS_V4_1) int nfs41_discover_server_trunking(struct nfs_client *clp, - struct nfs_client **, struct rpc_cred *); + struct nfs_client **, const struct cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); extern void nfs41_notify_server(struct nfs_client *); #else @@ -468,7 +462,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, } #endif /* CONFIG_NFS_V4_1 */ -extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t); +extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, const struct cred *, gfp_t); extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern void nfs4_purge_state_owners(struct nfs_server *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); @@ -494,7 +488,7 @@ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, const struct nfs_lock_context *, nfs4_stateid *, - struct rpc_cred **); + const struct cred **); extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state); extern bool nfs4_copy_open_stateid(nfs4_stateid *dst, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8f53455c4765..2548405da1f7 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -545,7 +545,7 @@ static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new, */ int nfs40_walk_client_list(struct nfs_client *new, struct nfs_client **result, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); struct nfs_client *pos, *prev = NULL; @@ -711,7 +711,7 @@ out_err: */ int nfs41_walk_client_list(struct nfs_client *new, struct nfs_client **result, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); struct nfs_client *pos, *prev = NULL; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 46d691ba04bc..45b2322e092d 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -133,15 +133,9 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t count, unsigned int flags) { - ssize_t ret; - if (file_inode(file_in) == file_inode(file_out)) return -EINVAL; -retry: - ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); - if (ret == -EAGAIN) - goto retry; - return ret; + return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); } static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0ba2b0fb8ff3..557a5d636183 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -93,19 +93,19 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label, struct inode *inode); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode); -static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, +static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs_open_context *ctx, struct nfs4_label *ilabel, struct nfs4_label *olabel); #ifdef CONFIG_NFS_V4_1 static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, - struct rpc_cred *cred, + const struct cred *cred, struct nfs4_slot *slot, bool is_privileged); static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, - struct rpc_cred *); + const struct cred *); static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, - struct rpc_cred *, bool); + const struct cred *, bool); #endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL @@ -361,7 +361,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent static void nfs4_test_and_free_stateid(struct nfs_server *server, nfs4_stateid *stateid, - struct rpc_cred *cred) + const struct cred *cred) { const struct nfs4_minor_version_ops *ops = server->nfs_client->cl_mvops; @@ -370,7 +370,7 @@ static void nfs4_test_and_free_stateid(struct nfs_server *server, static void __nfs4_free_revoked_stateid(struct nfs_server *server, nfs4_stateid *stateid, - struct rpc_cred *cred) + const struct cred *cred) { stateid->type = NFS4_REVOKED_STATEID_TYPE; nfs4_test_and_free_stateid(server, stateid, cred); @@ -378,7 +378,7 @@ static void __nfs4_free_revoked_stateid(struct nfs_server *server, static void nfs4_free_revoked_stateid(struct nfs_server *server, const nfs4_stateid *stateid, - struct rpc_cred *cred) + const struct cred *cred) { nfs4_stateid tmp; @@ -908,7 +908,7 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { static void nfs4_sequence_process_interrupted(struct nfs_client *client, - struct nfs4_slot *slot, struct rpc_cred *cred) + struct nfs4_slot *slot, const struct cred *cred) { struct rpc_task *task; @@ -939,7 +939,7 @@ EXPORT_SYMBOL_GPL(nfs4_sequence_done); static void nfs4_sequence_process_interrupted(struct nfs_client *client, - struct nfs4_slot *slot, struct rpc_cred *cred) + struct nfs4_slot *slot, const struct cred *cred) { WARN_ON_ONCE(1); slot->interrupted = 0; @@ -2484,7 +2484,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) * Note that in the non-execute case, we want to turn off permission * checking if we just created a new file (POSIX open() semantics). */ -static int nfs4_opendata_access(struct rpc_cred *cred, +static int nfs4_opendata_access(const struct cred *cred, struct nfs4_opendata *opendata, struct nfs4_state *state, fmode_t fmode, int openflags) @@ -2651,7 +2651,7 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st static int nfs40_test_and_free_expired_stateid(struct nfs_server *server, nfs4_stateid *stateid, - struct rpc_cred *cred) + const struct cred *cred) { return -NFS4ERR_BAD_STATEID; } @@ -2659,7 +2659,7 @@ static int nfs40_test_and_free_expired_stateid(struct nfs_server *server, #if defined(CONFIG_NFS_V4_1) static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, nfs4_stateid *stateid, - struct rpc_cred *cred) + const struct cred *cred) { int status; @@ -2693,7 +2693,7 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) struct nfs_server *server = NFS_SERVER(state->inode); nfs4_stateid stateid; struct nfs_delegation *delegation; - struct rpc_cred *cred; + const struct cred *cred = NULL; int status; /* Get the delegation credential for use by test/free_stateid */ @@ -2718,14 +2718,16 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) return; } - cred = get_rpccred(delegation->cred); + if (delegation->cred) + cred = get_cred(delegation->cred); rcu_read_unlock(); status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); trace_nfs4_test_delegation_stateid(state, NULL, status); if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) nfs_finish_clear_delegation_stateid(state, &stateid); - put_rpccred(cred); + if (delegation->cred) + put_cred(cred); } /** @@ -2748,7 +2750,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) spin_lock(&state->state_lock); list_for_each_entry(lsp, &state->lock_states, ls_locks) { if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { - struct rpc_cred *cred = lsp->ls_state->owner->so_cred; + const struct cred *cred = lsp->ls_state->owner->so_cred; refcount_inc(&lsp->ls_count); spin_unlock(&state->state_lock); @@ -2792,7 +2794,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); nfs4_stateid *stateid = &state->open_stateid; - struct rpc_cred *cred = state->owner->so_cred; + const struct cred *cred = state->owner->so_cred; int status; if (test_bit(NFS_OPEN_STATE, &state->flags) == 0) { @@ -2950,7 +2952,7 @@ static int _nfs4_do_open(struct inode *dir, struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *opendata; struct dentry *dentry = ctx->dentry; - struct rpc_cred *cred = ctx->cred; + const struct cred *cred = ctx->cred; struct nfs4_threshold **ctx_th = &ctx->mdsthreshold; fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; @@ -3120,7 +3122,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, static int _nfs4_do_setattr(struct inode *inode, struct nfs_setattrargs *arg, struct nfs_setattrres *res, - struct rpc_cred *cred, + const struct cred *cred, struct nfs_open_context *ctx) { struct nfs_server *server = NFS_SERVER(inode); @@ -3130,7 +3132,7 @@ static int _nfs4_do_setattr(struct inode *inode, .rpc_resp = res, .rpc_cred = cred, }; - struct rpc_cred *delegation_cred = NULL; + const struct cred *delegation_cred = NULL; unsigned long timestamp = jiffies; bool truncate; int status; @@ -3165,14 +3167,14 @@ zero_stateid: status = nfs4_call_sync(server->client, server, &msg, &arg->seq_args, &res->seq_res, 1); - put_rpccred(delegation_cred); + put_cred(delegation_cred); if (status == 0 && ctx != NULL) renew_lease(server, timestamp); trace_nfs4_setattr(inode, &arg->stateid, status); return status; } -static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, +static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs_open_context *ctx, struct nfs4_label *ilabel, struct nfs4_label *olabel) @@ -3973,7 +3975,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct iattr *sattr) { struct inode *inode = d_inode(dentry); - struct rpc_cred *cred = NULL; + const struct cred *cred = NULL; struct nfs_open_context *ctx = NULL; struct nfs4_label *label = NULL; int status; @@ -4202,7 +4204,6 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry return -ENOMEM; args.bitmask = server->cache_consistency_bitmask; } - status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (!status) { nfs_access_set_mask(entry, res.access); @@ -4691,7 +4692,7 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, return err; } -static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, +static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, u64 cookie, struct page **pages, unsigned int count, bool plus) { struct inode *dir = d_inode(dentry); @@ -4729,7 +4730,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, return status; } -static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, +static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, u64 cookie, struct page **pages, unsigned int count, bool plus) { struct nfs4_exception exception = { }; @@ -5257,7 +5258,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { .rpc_release = nfs4_renew_release, }; -static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) +static int nfs4_proc_async_renew(struct nfs_client *clp, const struct cred *cred, unsigned renew_flags) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -5281,7 +5282,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, &nfs4_renew_ops, data); } -static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_renew(struct nfs_client *clp, const struct cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -5696,7 +5697,6 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) { struct nfs4_label ilabel, *olabel = NULL; struct nfs_fattr fattr; - struct rpc_cred *cred; int status; if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) @@ -5709,10 +5709,6 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) ilabel.label = (char *)buf; ilabel.len = buflen; - cred = rpc_lookup_cred(); - if (IS_ERR(cred)) - return PTR_ERR(cred); - olabel = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); if (IS_ERR(olabel)) { status = -PTR_ERR(olabel); @@ -5725,7 +5721,6 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) nfs4_label_free(olabel); out: - put_rpccred(cred); return status; } #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ @@ -5894,13 +5889,13 @@ static const struct rpc_call_ops nfs4_setclientid_ops = { * @clp: state data structure * @program: RPC program for NFSv4 callback service * @port: IP port number for NFS4 callback service - * @cred: RPC credential to use for this call + * @cred: credential to use for this call * @res: where to place the result * * Returns zero, a negative errno, or a negative NFS4ERR status code. */ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, - unsigned short port, struct rpc_cred *cred, + unsigned short port, const struct cred *cred, struct nfs4_setclientid_res *res) { nfs4_verifier sc_verifier; @@ -5969,13 +5964,13 @@ out: * nfs4_proc_setclientid_confirm - Confirm client ID * @clp: state data structure * @res: result of a previous SETCLIENTID - * @cred: RPC credential to use for this call + * @cred: credential to use for this call * * Returns zero, a negative errno, or a negative NFS4ERR status code. */ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct nfs4_setclientid_res *arg, - struct rpc_cred *cred) + const struct cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], @@ -6138,7 +6133,7 @@ static const struct rpc_call_ops nfs4_delegreturn_ops = { .rpc_release = nfs4_delegreturn_release, }; -static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync) +static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync) { struct nfs4_delegreturndata *data; struct nfs_server *server = NFS_SERVER(inode); @@ -6205,7 +6200,7 @@ out: return status; } -int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync) +int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; @@ -7268,7 +7263,7 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, */ static int _nfs40_proc_get_locations(struct inode *inode, struct nfs4_fs_locations *locations, - struct page *page, struct rpc_cred *cred) + struct page *page, const struct cred *cred) { struct nfs_server *server = NFS_SERVER(inode); struct rpc_clnt *clnt = server->client; @@ -7325,7 +7320,7 @@ static int _nfs40_proc_get_locations(struct inode *inode, */ static int _nfs41_proc_get_locations(struct inode *inode, struct nfs4_fs_locations *locations, - struct page *page, struct rpc_cred *cred) + struct page *page, const struct cred *cred) { struct nfs_server *server = NFS_SERVER(inode); struct rpc_clnt *clnt = server->client; @@ -7384,7 +7379,7 @@ static int _nfs41_proc_get_locations(struct inode *inode, */ int nfs4_proc_get_locations(struct inode *inode, struct nfs4_fs_locations *locations, - struct page *page, struct rpc_cred *cred) + struct page *page, const struct cred *cred) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_client *clp = server->nfs_client; @@ -7415,7 +7410,7 @@ int nfs4_proc_get_locations(struct inode *inode, * is appended to this compound to identify the client ID which is * performing recovery. */ -static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) +static int _nfs40_proc_fsid_present(struct inode *inode, const struct cred *cred) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; @@ -7461,7 +7456,7 @@ static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) * this operation is identified in the SEQUENCE operation in this * compound. */ -static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) +static int _nfs41_proc_fsid_present(struct inode *inode, const struct cred *cred) { struct nfs_server *server = NFS_SERVER(inode); struct rpc_clnt *clnt = server->client; @@ -7508,7 +7503,7 @@ static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) * NFS4ERR code if some error occurred on the server, or a * negative errno if a local failure occurred. */ -int nfs4_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) +int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_client *clp = server->nfs_client; @@ -7555,7 +7550,7 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct .rpc_resp = &res, }; struct rpc_clnt *clnt = NFS_SERVER(dir)->client; - struct rpc_cred *cred = NULL; + const struct cred *cred = NULL; if (use_integrity) { clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient; @@ -7572,8 +7567,7 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct &res.seq_res, 0); dprintk("NFS reply secinfo: %d\n", status); - if (cred) - put_rpccred(cred); + put_cred(cred); return status; } @@ -7654,7 +7648,7 @@ static int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt, struct rpc_xprt *xprt, struct nfs_client *clp, - struct rpc_cred *cred) + const struct cred *cred) { int status; struct nfs41_bind_conn_to_session_args args = { @@ -7716,7 +7710,7 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt, struct rpc_bind_conn_calldata { struct nfs_client *clp; - struct rpc_cred *cred; + const struct cred *cred; }; static int @@ -7729,7 +7723,7 @@ nfs4_proc_bind_conn_to_session_callback(struct rpc_clnt *clnt, return nfs4_proc_bind_one_conn_to_session(clnt, xprt, p->clp, p->cred); } -int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred) +int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, const struct cred *cred) { struct rpc_bind_conn_calldata data = { .clp = clp, @@ -7895,7 +7889,7 @@ static const struct rpc_call_ops nfs4_exchange_id_call_ops = { * Wrapper for EXCHANGE_ID operation. */ static struct rpc_task * -nfs4_run_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, +nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred, u32 sp4_how, struct rpc_xprt *xprt) { struct rpc_message msg = { @@ -7991,7 +7985,7 @@ out: * * Wrapper for EXCHANGE_ID operation. */ -static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, +static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred, u32 sp4_how) { struct rpc_task *task; @@ -8058,7 +8052,7 @@ out: * * Will attempt to negotiate SP4_MACH_CRED if krb5i / krb5p auth is used. */ -int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) +int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred) { rpc_authflavor_t authflavor = clp->cl_rpcclient->cl_auth->au_flavor; int status; @@ -8090,7 +8084,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) * @xprt: the rpc_xprt to test * @data: call data for _nfs4_proc_exchange_id. */ -int nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, +void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *data) { struct nfs4_add_xprt_data *adata = (struct nfs4_add_xprt_data *)data; @@ -8107,20 +8101,22 @@ int nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, /* Test connection for session trunking. Async exchange_id call */ task = nfs4_run_exchange_id(adata->clp, adata->cred, sp4_how, xprt); if (IS_ERR(task)) - return PTR_ERR(task); + return; status = task->tk_status; if (status == 0) status = nfs4_detect_session_trunking(adata->clp, task->tk_msg.rpc_resp, xprt); + if (status == 0) + rpc_clnt_xprt_switch_add_xprt(clnt, xprt); + rpc_put_task(task); - return status; } EXPORT_SYMBOL_GPL(nfs4_test_session_trunk); static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, - struct rpc_cred *cred) + const struct cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID], @@ -8138,7 +8134,7 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, } static int nfs4_proc_destroy_clientid(struct nfs_client *clp, - struct rpc_cred *cred) + const struct cred *cred) { unsigned int loop; int ret; @@ -8159,7 +8155,7 @@ static int nfs4_proc_destroy_clientid(struct nfs_client *clp, int nfs4_destroy_clientid(struct nfs_client *clp) { - struct rpc_cred *cred; + const struct cred *cred; int ret = 0; if (clp->cl_mvops->minor_version < 1) @@ -8170,8 +8166,7 @@ int nfs4_destroy_clientid(struct nfs_client *clp) goto out; cred = nfs4_get_clid_cred(clp); ret = nfs4_proc_destroy_clientid(clp, cred); - if (cred) - put_rpccred(cred); + put_cred(cred); switch (ret) { case 0: case -NFS4ERR_STALE_CLIENTID: @@ -8387,7 +8382,7 @@ static void nfs4_update_session(struct nfs4_session *session, } static int _nfs4_proc_create_session(struct nfs_client *clp, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs4_session *session = clp->cl_session; struct nfs41_create_session_args args = { @@ -8439,7 +8434,7 @@ out: * It is the responsibility of the caller to verify the session is * expired before calling this routine. */ -int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred) +int nfs4_proc_create_session(struct nfs_client *clp, const struct cred *cred) { int status; unsigned *ptr; @@ -8470,7 +8465,7 @@ out: * The caller must serialize access to this routine. */ int nfs4_proc_destroy_session(struct nfs4_session *session, - struct rpc_cred *cred) + const struct cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION], @@ -8572,7 +8567,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = { }; static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, - struct rpc_cred *cred, + const struct cred *cred, struct nfs4_slot *slot, bool is_privileged) { @@ -8615,7 +8610,7 @@ out_err: return ret; } -static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) +static int nfs41_proc_async_sequence(struct nfs_client *clp, const struct cred *cred, unsigned renew_flags) { struct rpc_task *task; int ret = 0; @@ -8631,7 +8626,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr return ret; } -static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_sequence(struct nfs_client *clp, const struct cred *cred) { struct rpc_task *task; int ret; @@ -8727,7 +8722,7 @@ static const struct rpc_call_ops nfs4_reclaim_complete_call_ops = { * Issue a global reclaim complete. */ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs4_reclaim_complete_data *calldata; struct rpc_task *task; @@ -9080,7 +9075,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) static int _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs4_getdeviceinfo_args args = { .pdev = pdev, @@ -9112,7 +9107,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs4_exception exception = { }; int err; @@ -9169,7 +9164,7 @@ static void nfs4_layoutcommit_release(void *calldata) pnfs_cleanup_layoutcommit(data); nfs_post_op_update_inode_force_wcc(data->args.inode, data->res.fattr); - put_rpccred(data->cred); + put_cred(data->cred); nfs_iput_and_deactive(data->inode); kfree(data); } @@ -9245,7 +9240,7 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = &res, }; struct rpc_clnt *clnt = server->client; - struct rpc_cred *cred = NULL; + const struct cred *cred = NULL; int status; if (use_integrity) { @@ -9259,8 +9254,7 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, &res.seq_res, 0); dprintk("<-- %s status=%d\n", __func__, status); - if (cred) - put_rpccred(cred); + put_cred(cred); return status; } @@ -9373,7 +9367,7 @@ out: static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid, - struct rpc_cred *cred) + const struct cred *cred) { int status; struct nfs41_test_stateid_args args = { @@ -9434,7 +9428,7 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server, */ static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs4_exception exception = { }; int err; @@ -9496,7 +9490,7 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = { */ static int nfs41_free_stateid(struct nfs_server *server, const nfs4_stateid *stateid, - struct rpc_cred *cred, + const struct cred *cred, bool privileged) { struct rpc_message msg = { @@ -9537,7 +9531,7 @@ static int nfs41_free_stateid(struct nfs_server *server, static void nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) { - struct rpc_cred *cred = lsp->ls_state->owner->so_cred; + const struct cred *cred = lsp->ls_state->owner->so_cred; nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); nfs4_free_lock_state(server, lsp); @@ -9608,14 +9602,14 @@ static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { static const struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { .sched_state_renewal = nfs4_proc_async_renew, - .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, + .get_state_renewal_cred = nfs4_get_renew_cred, .renew_lease = nfs4_proc_renew, }; #if defined(CONFIG_NFS_V4_1) static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { .sched_state_renewal = nfs41_proc_async_sequence, - .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, + .get_state_renewal_cred = nfs4_get_machine_cred, .renew_lease = nfs4_proc_sequence, }; #endif diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 1f8c2ae43a8d..6ea431b067dd 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -57,7 +57,7 @@ nfs4_renew_state(struct work_struct *work) const struct nfs4_state_maintenance_ops *ops; struct nfs_client *clp = container_of(work, struct nfs_client, cl_renewd.work); - struct rpc_cred *cred; + const struct cred *cred; long lease; unsigned long last, now; unsigned renew_flags = 0; @@ -68,7 +68,6 @@ nfs4_renew_state(struct work_struct *work) if (test_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state)) goto out; - spin_lock(&clp->cl_lock); lease = clp->cl_lease_time; last = clp->cl_last_renewal; now = jiffies; @@ -79,8 +78,7 @@ nfs4_renew_state(struct work_struct *work) renew_flags |= NFS4_RENEW_DELEGATION_CB; if (renew_flags != 0) { - cred = ops->get_state_renewal_cred_locked(clp); - spin_unlock(&clp->cl_lock); + cred = ops->get_state_renewal_cred(clp); if (cred == NULL) { if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) { set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); @@ -92,7 +90,7 @@ nfs4_renew_state(struct work_struct *work) /* Queue an asynchronous RENEW. */ ret = ops->sched_state_renewal(clp, cred, renew_flags); - put_rpccred(cred); + put_cred(cred); switch (ret) { default: goto out_exp; @@ -104,7 +102,6 @@ nfs4_renew_state(struct work_struct *work) } else { dprintk("%s: failed to call renewd. Reason: lease not expired \n", __func__); - spin_unlock(&clp->cl_lock); } nfs4_schedule_state_renewal(clp); out_exp: diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 769b85655c4b..a5489d70a724 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -573,12 +573,11 @@ static void nfs4_destroy_session_slot_tables(struct nfs4_session *session) void nfs4_destroy_session(struct nfs4_session *session) { struct rpc_xprt *xprt; - struct rpc_cred *cred; + const struct cred *cred; cred = nfs4_get_clid_cred(session->clp); nfs4_proc_destroy_session(session, cred); - if (cred) - put_rpccred(cred); + put_cred(cred); rcu_read_lock(); xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d8decf2ec48f..02488b50534a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -87,7 +87,7 @@ const nfs4_stateid current_stateid = { static DEFINE_MUTEX(nfs_clid_init_mutex); -int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) +int nfs4_init_clientid(struct nfs_client *clp, const struct cred *cred) { struct nfs4_setclientid_res clid = { .clientid = clp->cl_clientid, @@ -134,7 +134,7 @@ out: */ int nfs40_discover_server_trunking(struct nfs_client *clp, struct nfs_client **result, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs4_setclientid_res clid = { .clientid = clp->cl_clientid, @@ -164,32 +164,23 @@ out: return status; } -struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) +const struct cred *nfs4_get_machine_cred(struct nfs_client *clp) { - struct rpc_cred *cred = NULL; - - if (clp->cl_machine_cred != NULL) - cred = get_rpccred(clp->cl_machine_cred); - return cred; + return get_cred(rpc_machine_cred()); } static void nfs4_root_machine_cred(struct nfs_client *clp) { - struct rpc_cred *cred, *new; - new = rpc_lookup_machine_cred(NULL); - spin_lock(&clp->cl_lock); - cred = clp->cl_machine_cred; - clp->cl_machine_cred = new; - spin_unlock(&clp->cl_lock); - if (cred != NULL) - put_rpccred(cred); + /* Force root creds instead of machine */ + clp->cl_principal = NULL; + clp->cl_rpcclient->cl_principal = NULL; } -static struct rpc_cred * +static const struct cred * nfs4_get_renew_cred_server_locked(struct nfs_server *server) { - struct rpc_cred *cred = NULL; + const struct cred *cred = NULL; struct nfs4_state_owner *sp; struct rb_node *pos; @@ -199,29 +190,30 @@ nfs4_get_renew_cred_server_locked(struct nfs_server *server) sp = rb_entry(pos, struct nfs4_state_owner, so_server_node); if (list_empty(&sp->so_states)) continue; - cred = get_rpccred(sp->so_cred); + cred = get_cred(sp->so_cred); break; } return cred; } /** - * nfs4_get_renew_cred_locked - Acquire credential for a renew operation + * nfs4_get_renew_cred - Acquire credential for a renew operation * @clp: client state handle * * Returns an rpc_cred with reference count bumped, or NULL. * Caller must hold clp->cl_lock. */ -struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) +const struct cred *nfs4_get_renew_cred(struct nfs_client *clp) { - struct rpc_cred *cred = NULL; + const struct cred *cred = NULL; struct nfs_server *server; /* Use machine credentials if available */ - cred = nfs4_get_machine_cred_locked(clp); + cred = nfs4_get_machine_cred(clp); if (cred != NULL) goto out; + spin_lock(&clp->cl_lock); rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { cred = nfs4_get_renew_cred_server_locked(server); @@ -229,6 +221,7 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) break; } rcu_read_unlock(); + spin_unlock(&clp->cl_lock); out: return cred; @@ -319,7 +312,7 @@ static void nfs41_finish_session_reset(struct nfs_client *clp) nfs41_setup_state_renewal(clp); } -int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) +int nfs41_init_clientid(struct nfs_client *clp, const struct cred *cred) { int status; @@ -354,7 +347,7 @@ out: */ int nfs41_discover_server_trunking(struct nfs_client *clp, struct nfs_client **result, - struct rpc_cred *cred) + const struct cred *cred) { int status; @@ -392,32 +385,32 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, * nfs4_get_clid_cred - Acquire credential for a setclientid operation * @clp: client state handle * - * Returns an rpc_cred with reference count bumped, or NULL. + * Returns a cred with reference count bumped, or NULL. */ -struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp) +const struct cred *nfs4_get_clid_cred(struct nfs_client *clp) { - struct rpc_cred *cred; + const struct cred *cred; - spin_lock(&clp->cl_lock); - cred = nfs4_get_machine_cred_locked(clp); - spin_unlock(&clp->cl_lock); + cred = nfs4_get_machine_cred(clp); return cred; } static struct nfs4_state_owner * -nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) +nfs4_find_state_owner_locked(struct nfs_server *server, const struct cred *cred) { struct rb_node **p = &server->state_owners.rb_node, *parent = NULL; struct nfs4_state_owner *sp; + int cmp; while (*p != NULL) { parent = *p; sp = rb_entry(parent, struct nfs4_state_owner, so_server_node); + cmp = cred_fscmp(cred, sp->so_cred); - if (cred < sp->so_cred) + if (cmp < 0) p = &parent->rb_left; - else if (cred > sp->so_cred) + else if (cmp > 0) p = &parent->rb_right; else { if (!list_empty(&sp->so_lru)) @@ -436,14 +429,16 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) struct rb_node **p = &server->state_owners.rb_node, *parent = NULL; struct nfs4_state_owner *sp; + int cmp; while (*p != NULL) { parent = *p; sp = rb_entry(parent, struct nfs4_state_owner, so_server_node); + cmp = cred_fscmp(new->so_cred, sp->so_cred); - if (new->so_cred < sp->so_cred) + if (cmp < 0) p = &parent->rb_left; - else if (new->so_cred > sp->so_cred) + else if (cmp > 0) p = &parent->rb_right; else { if (!list_empty(&sp->so_lru)) @@ -490,7 +485,7 @@ nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc) */ static struct nfs4_state_owner * nfs4_alloc_state_owner(struct nfs_server *server, - struct rpc_cred *cred, + const struct cred *cred, gfp_t gfp_flags) { struct nfs4_state_owner *sp; @@ -505,7 +500,7 @@ nfs4_alloc_state_owner(struct nfs_server *server, return NULL; } sp->so_server = server; - sp->so_cred = get_rpccred(cred); + sp->so_cred = get_cred(cred); spin_lock_init(&sp->so_lock); INIT_LIST_HEAD(&sp->so_states); nfs4_init_seqid_counter(&sp->so_seqid); @@ -534,7 +529,7 @@ nfs4_reset_state_owner(struct nfs4_state_owner *sp) static void nfs4_free_state_owner(struct nfs4_state_owner *sp) { nfs4_destroy_seqid_counter(&sp->so_seqid); - put_rpccred(sp->so_cred); + put_cred(sp->so_cred); ida_simple_remove(&sp->so_server->openowner_id, sp->so_seqid.owner_id); kfree(sp); } @@ -572,7 +567,7 @@ static void nfs4_gc_state_owners(struct nfs_server *server) * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL. */ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, - struct rpc_cred *cred, + const struct cred *cred, gfp_t gfp_flags) { struct nfs_client *clp = server->nfs_client; @@ -1041,7 +1036,7 @@ bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) */ int nfs4_select_rw_stateid(struct nfs4_state *state, fmode_t fmode, const struct nfs_lock_context *l_ctx, - nfs4_stateid *dst, struct rpc_cred **cred) + nfs4_stateid *dst, const struct cred **cred) { int ret; @@ -1560,7 +1555,7 @@ static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state spin_lock(&sp->so_server->nfs_client->cl_lock); list_for_each_entry(copy, &sp->so_server->ss_copies, copies) { - if (nfs4_stateid_match_other(&state->stateid, ©->parent_state->stateid)) + if (!nfs4_stateid_match_other(&state->stateid, ©->parent_state->stateid)) continue; copy->flags = 1; complete(©->completion); @@ -1741,7 +1736,7 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) static int nfs4_reclaim_complete(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops, - struct rpc_cred *cred) + const struct cred *cred) { /* Notify the server we're done reclaiming our state */ if (ops->reclaim_complete) @@ -1792,7 +1787,7 @@ static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp) static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) { const struct nfs4_state_recovery_ops *ops; - struct rpc_cred *cred; + const struct cred *cred; int err; if (!nfs4_state_clear_reclaim_reboot(clp)) @@ -1800,7 +1795,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) ops = clp->cl_mvops->reboot_recovery_ops; cred = nfs4_get_clid_cred(clp); err = nfs4_reclaim_complete(clp, ops, cred); - put_rpccred(cred); + put_cred(cred); if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION) set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); } @@ -1896,7 +1891,7 @@ restart: static int nfs4_check_lease(struct nfs_client *clp) { - struct rpc_cred *cred; + const struct cred *cred; const struct nfs4_state_maintenance_ops *ops = clp->cl_mvops->state_renewal_ops; int status; @@ -1904,9 +1899,7 @@ static int nfs4_check_lease(struct nfs_client *clp) /* Is the client already known to have an expired lease? */ if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) return 0; - spin_lock(&clp->cl_lock); - cred = ops->get_state_renewal_cred_locked(clp); - spin_unlock(&clp->cl_lock); + cred = ops->get_state_renewal_cred(clp); if (cred == NULL) { cred = nfs4_get_clid_cred(clp); status = -ENOKEY; @@ -1914,7 +1907,7 @@ static int nfs4_check_lease(struct nfs_client *clp) goto out; } status = ops->renew_lease(clp, cred); - put_rpccred(cred); + put_cred(cred); if (status == -ETIMEDOUT) { set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); return 0; @@ -1974,7 +1967,7 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) static int nfs4_establish_lease(struct nfs_client *clp) { - struct rpc_cred *cred; + const struct cred *cred; const struct nfs4_state_recovery_ops *ops = clp->cl_mvops->reboot_recovery_ops; int status; @@ -1986,7 +1979,7 @@ static int nfs4_establish_lease(struct nfs_client *clp) if (cred == NULL) return -ENOENT; status = ops->establish_clid(clp, cred); - put_rpccred(cred); + put_cred(cred); if (status != 0) return status; pnfs_destroy_all_layouts(clp); @@ -2033,7 +2026,7 @@ static int nfs4_purge_lease(struct nfs_client *clp) * * Returns zero or a negative NFS4ERR status code. */ -static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred) +static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred) { struct nfs_client *clp = server->nfs_client; struct nfs4_fs_locations *locations = NULL; @@ -2103,14 +2096,12 @@ static int nfs4_handle_migration(struct nfs_client *clp) const struct nfs4_state_maintenance_ops *ops = clp->cl_mvops->state_renewal_ops; struct nfs_server *server; - struct rpc_cred *cred; + const struct cred *cred; dprintk("%s: migration reported on \"%s\"\n", __func__, clp->cl_hostname); - spin_lock(&clp->cl_lock); - cred = ops->get_state_renewal_cred_locked(clp); - spin_unlock(&clp->cl_lock); + cred = ops->get_state_renewal_cred(clp); if (cred == NULL) return -NFS4ERR_NOENT; @@ -2131,13 +2122,13 @@ restart: rcu_read_unlock(); status = nfs4_try_migration(server, cred); if (status < 0) { - put_rpccred(cred); + put_cred(cred); return status; } goto restart; } rcu_read_unlock(); - put_rpccred(cred); + put_cred(cred); return 0; } @@ -2151,14 +2142,12 @@ static int nfs4_handle_lease_moved(struct nfs_client *clp) const struct nfs4_state_maintenance_ops *ops = clp->cl_mvops->state_renewal_ops; struct nfs_server *server; - struct rpc_cred *cred; + const struct cred *cred; dprintk("%s: lease moved reported on \"%s\"\n", __func__, clp->cl_hostname); - spin_lock(&clp->cl_lock); - cred = ops->get_state_renewal_cred_locked(clp); - spin_unlock(&clp->cl_lock); + cred = ops->get_state_renewal_cred(clp); if (cred == NULL) return -NFS4ERR_NOENT; @@ -2186,7 +2175,7 @@ restart: rcu_read_unlock(); out: - put_rpccred(cred); + put_cred(cred); return 0; } @@ -2209,7 +2198,7 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops = clp->cl_mvops->reboot_recovery_ops; struct rpc_clnt *clnt; - struct rpc_cred *cred; + const struct cred *cred; int i, status; dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname); @@ -2225,7 +2214,7 @@ again: goto out_unlock; status = ops->detect_trunking(clp, result, cred); - put_rpccred(cred); + put_cred(cred); switch (status) { case 0: case -EINTR: @@ -2416,7 +2405,7 @@ out_recovery: static int nfs4_reset_session(struct nfs_client *clp) { - struct rpc_cred *cred; + const struct cred *cred; int status; if (!nfs4_has_session(clp)) @@ -2454,14 +2443,13 @@ static int nfs4_reset_session(struct nfs_client *clp) dprintk("%s: session reset was successful for server %s!\n", __func__, clp->cl_hostname); out: - if (cred) - put_rpccred(cred); + put_cred(cred); return status; } static int nfs4_bind_conn_to_session(struct nfs_client *clp) { - struct rpc_cred *cred; + const struct cred *cred; int ret; if (!nfs4_has_session(clp)) @@ -2471,8 +2459,7 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) return ret; cred = nfs4_get_clid_cred(clp); ret = nfs4_proc_bind_conn_to_session(clp, cred); - if (cred) - put_rpccred(cred); + put_cred(cred); clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); switch (ret) { case 0: diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index b1483b303e0b..b4557cf685fb 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -10,157 +10,302 @@ #include <linux/tracepoint.h> +TRACE_DEFINE_ENUM(EPERM); +TRACE_DEFINE_ENUM(ENOENT); +TRACE_DEFINE_ENUM(EIO); +TRACE_DEFINE_ENUM(ENXIO); +TRACE_DEFINE_ENUM(EACCES); +TRACE_DEFINE_ENUM(EEXIST); +TRACE_DEFINE_ENUM(EXDEV); +TRACE_DEFINE_ENUM(ENOTDIR); +TRACE_DEFINE_ENUM(EISDIR); +TRACE_DEFINE_ENUM(EFBIG); +TRACE_DEFINE_ENUM(ENOSPC); +TRACE_DEFINE_ENUM(EROFS); +TRACE_DEFINE_ENUM(EMLINK); +TRACE_DEFINE_ENUM(ENAMETOOLONG); +TRACE_DEFINE_ENUM(ENOTEMPTY); +TRACE_DEFINE_ENUM(EDQUOT); +TRACE_DEFINE_ENUM(ESTALE); +TRACE_DEFINE_ENUM(EBADHANDLE); +TRACE_DEFINE_ENUM(EBADCOOKIE); +TRACE_DEFINE_ENUM(ENOTSUPP); +TRACE_DEFINE_ENUM(ETOOSMALL); +TRACE_DEFINE_ENUM(EREMOTEIO); +TRACE_DEFINE_ENUM(EBADTYPE); +TRACE_DEFINE_ENUM(EAGAIN); +TRACE_DEFINE_ENUM(ELOOP); +TRACE_DEFINE_ENUM(EOPNOTSUPP); +TRACE_DEFINE_ENUM(EDEADLK); +TRACE_DEFINE_ENUM(ENOMEM); +TRACE_DEFINE_ENUM(EKEYEXPIRED); +TRACE_DEFINE_ENUM(ETIMEDOUT); +TRACE_DEFINE_ENUM(ERESTARTSYS); +TRACE_DEFINE_ENUM(ECONNREFUSED); +TRACE_DEFINE_ENUM(ECONNRESET); +TRACE_DEFINE_ENUM(ENETUNREACH); +TRACE_DEFINE_ENUM(EHOSTUNREACH); +TRACE_DEFINE_ENUM(EHOSTDOWN); +TRACE_DEFINE_ENUM(EPIPE); +TRACE_DEFINE_ENUM(EPFNOSUPPORT); +TRACE_DEFINE_ENUM(EPROTONOSUPPORT); + +TRACE_DEFINE_ENUM(NFS4_OK); +TRACE_DEFINE_ENUM(NFS4ERR_ACCESS); +TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED); +TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY); +TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR); +TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE); +TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE); +TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT); +TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL); +TRACE_DEFINE_ENUM(NFS4ERR_BADNAME); +TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER); +TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION); +TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT); +TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE); +TRACE_DEFINE_ENUM(NFS4ERR_BADXDR); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST); +TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID); +TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN); +TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE); +TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY); +TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY); +TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION); +TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK); +TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION); +TRACE_DEFINE_ENUM(NFS4ERR_DELAY); +TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED); +TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED); +TRACE_DEFINE_ENUM(NFS4ERR_DENIED); +TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL); +TRACE_DEFINE_ENUM(NFS4ERR_DQUOT); +TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_EXIST); +TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED); +TRACE_DEFINE_ENUM(NFS4ERR_FBIG); +TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED); +TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN); +TRACE_DEFINE_ENUM(NFS4ERR_GRACE); +TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_INVAL); +TRACE_DEFINE_ENUM(NFS4ERR_IO); +TRACE_DEFINE_ENUM(NFS4ERR_ISDIR); +TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER); +TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE); +TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED); +TRACE_DEFINE_ENUM(NFS4ERR_LOCKED); +TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD); +TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE); +TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH); +TRACE_DEFINE_ENUM(NFS4ERR_MLINK); +TRACE_DEFINE_ENUM(NFS4ERR_MOVED); +TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG); +TRACE_DEFINE_ENUM(NFS4ERR_NOENT); +TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE); +TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT); +TRACE_DEFINE_ENUM(NFS4ERR_NOSPC); +TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR); +TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY); +TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP); +TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP); +TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME); +TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE); +TRACE_DEFINE_ENUM(NFS4ERR_NXIO); +TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID); +TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE); +TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL); +TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION); +TRACE_DEFINE_ENUM(NFS4ERR_PERM); +TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE); +TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT); +TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT); +TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD); +TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT); +TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG); +TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG); +TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE); +TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG); +TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE); +TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH); +TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP); +TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT); +TRACE_DEFINE_ENUM(NFS4ERR_ROFS); +TRACE_DEFINE_ENUM(NFS4ERR_SAME); +TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED); +TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS); +TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY); +TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED); +TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT); +TRACE_DEFINE_ENUM(NFS4ERR_STALE); +TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID); +TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID); +TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK); +TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL); +TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS); +TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE); +TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND); +TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC); +TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED); +TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); +TRACE_DEFINE_ENUM(NFS4ERR_XDEV); + #define show_nfsv4_errors(error) \ - __print_symbolic(error, \ + __print_symbolic(-(error), \ { NFS4_OK, "OK" }, \ /* Mapped by nfs4_stat_to_errno() */ \ - { -EPERM, "EPERM" }, \ - { -ENOENT, "ENOENT" }, \ - { -EIO, "EIO" }, \ - { -ENXIO, "ENXIO" }, \ - { -EACCES, "EACCES" }, \ - { -EEXIST, "EEXIST" }, \ - { -EXDEV, "EXDEV" }, \ - { -ENOTDIR, "ENOTDIR" }, \ - { -EISDIR, "EISDIR" }, \ - { -EFBIG, "EFBIG" }, \ - { -ENOSPC, "ENOSPC" }, \ - { -EROFS, "EROFS" }, \ - { -EMLINK, "EMLINK" }, \ - { -ENAMETOOLONG, "ENAMETOOLONG" }, \ - { -ENOTEMPTY, "ENOTEMPTY" }, \ - { -EDQUOT, "EDQUOT" }, \ - { -ESTALE, "ESTALE" }, \ - { -EBADHANDLE, "EBADHANDLE" }, \ - { -EBADCOOKIE, "EBADCOOKIE" }, \ - { -ENOTSUPP, "ENOTSUPP" }, \ - { -ETOOSMALL, "ETOOSMALL" }, \ - { -EREMOTEIO, "EREMOTEIO" }, \ - { -EBADTYPE, "EBADTYPE" }, \ - { -EAGAIN, "EAGAIN" }, \ - { -ELOOP, "ELOOP" }, \ - { -EOPNOTSUPP, "EOPNOTSUPP" }, \ - { -EDEADLK, "EDEADLK" }, \ + { EPERM, "EPERM" }, \ + { ENOENT, "ENOENT" }, \ + { EIO, "EIO" }, \ + { ENXIO, "ENXIO" }, \ + { EACCES, "EACCES" }, \ + { EEXIST, "EEXIST" }, \ + { EXDEV, "EXDEV" }, \ + { ENOTDIR, "ENOTDIR" }, \ + { EISDIR, "EISDIR" }, \ + { EFBIG, "EFBIG" }, \ + { ENOSPC, "ENOSPC" }, \ + { EROFS, "EROFS" }, \ + { EMLINK, "EMLINK" }, \ + { ENAMETOOLONG, "ENAMETOOLONG" }, \ + { ENOTEMPTY, "ENOTEMPTY" }, \ + { EDQUOT, "EDQUOT" }, \ + { ESTALE, "ESTALE" }, \ + { EBADHANDLE, "EBADHANDLE" }, \ + { EBADCOOKIE, "EBADCOOKIE" }, \ + { ENOTSUPP, "ENOTSUPP" }, \ + { ETOOSMALL, "ETOOSMALL" }, \ + { EREMOTEIO, "EREMOTEIO" }, \ + { EBADTYPE, "EBADTYPE" }, \ + { EAGAIN, "EAGAIN" }, \ + { ELOOP, "ELOOP" }, \ + { EOPNOTSUPP, "EOPNOTSUPP" }, \ + { EDEADLK, "EDEADLK" }, \ /* RPC errors */ \ - { -ENOMEM, "ENOMEM" }, \ - { -EKEYEXPIRED, "EKEYEXPIRED" }, \ - { -ETIMEDOUT, "ETIMEDOUT" }, \ - { -ERESTARTSYS, "ERESTARTSYS" }, \ - { -ECONNREFUSED, "ECONNREFUSED" }, \ - { -ECONNRESET, "ECONNRESET" }, \ - { -ENETUNREACH, "ENETUNREACH" }, \ - { -EHOSTUNREACH, "EHOSTUNREACH" }, \ - { -EHOSTDOWN, "EHOSTDOWN" }, \ - { -EPIPE, "EPIPE" }, \ - { -EPFNOSUPPORT, "EPFNOSUPPORT" }, \ - { -EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ + { ENOMEM, "ENOMEM" }, \ + { EKEYEXPIRED, "EKEYEXPIRED" }, \ + { ETIMEDOUT, "ETIMEDOUT" }, \ + { ERESTARTSYS, "ERESTARTSYS" }, \ + { ECONNREFUSED, "ECONNREFUSED" }, \ + { ECONNRESET, "ECONNRESET" }, \ + { ENETUNREACH, "ENETUNREACH" }, \ + { EHOSTUNREACH, "EHOSTUNREACH" }, \ + { EHOSTDOWN, "EHOSTDOWN" }, \ + { EPIPE, "EPIPE" }, \ + { EPFNOSUPPORT, "EPFNOSUPPORT" }, \ + { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ /* NFSv4 native errors */ \ - { -NFS4ERR_ACCESS, "ACCESS" }, \ - { -NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ - { -NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \ - { -NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \ - { -NFS4ERR_BADCHAR, "BADCHAR" }, \ - { -NFS4ERR_BADHANDLE, "BADHANDLE" }, \ - { -NFS4ERR_BADIOMODE, "BADIOMODE" }, \ - { -NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \ - { -NFS4ERR_BADLABEL, "BADLABEL" }, \ - { -NFS4ERR_BADNAME, "BADNAME" }, \ - { -NFS4ERR_BADOWNER, "BADOWNER" }, \ - { -NFS4ERR_BADSESSION, "BADSESSION" }, \ - { -NFS4ERR_BADSLOT, "BADSLOT" }, \ - { -NFS4ERR_BADTYPE, "BADTYPE" }, \ - { -NFS4ERR_BADXDR, "BADXDR" }, \ - { -NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \ - { -NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \ - { -NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \ - { -NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \ - { -NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \ - { -NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \ - { -NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ - { -NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \ - { -NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \ - { -NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \ - { -NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \ + { NFS4ERR_ACCESS, "ACCESS" }, \ + { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ + { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \ + { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \ + { NFS4ERR_BADCHAR, "BADCHAR" }, \ + { NFS4ERR_BADHANDLE, "BADHANDLE" }, \ + { NFS4ERR_BADIOMODE, "BADIOMODE" }, \ + { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \ + { NFS4ERR_BADLABEL, "BADLABEL" }, \ + { NFS4ERR_BADNAME, "BADNAME" }, \ + { NFS4ERR_BADOWNER, "BADOWNER" }, \ + { NFS4ERR_BADSESSION, "BADSESSION" }, \ + { NFS4ERR_BADSLOT, "BADSLOT" }, \ + { NFS4ERR_BADTYPE, "BADTYPE" }, \ + { NFS4ERR_BADXDR, "BADXDR" }, \ + { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \ + { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \ + { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \ + { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \ + { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \ + { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \ + { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ + { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \ + { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \ + { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \ + { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \ "CONN_NOT_BOUND_TO_SESSION" }, \ - { -NFS4ERR_DEADLOCK, "DEADLOCK" }, \ - { -NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \ - { -NFS4ERR_DELAY, "DELAY" }, \ - { -NFS4ERR_DELEG_ALREADY_WANTED, \ + { NFS4ERR_DEADLOCK, "DEADLOCK" }, \ + { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \ + { NFS4ERR_DELAY, "DELAY" }, \ + { NFS4ERR_DELEG_ALREADY_WANTED, \ "DELEG_ALREADY_WANTED" }, \ - { -NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \ - { -NFS4ERR_DENIED, "DENIED" }, \ - { -NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \ - { -NFS4ERR_DQUOT, "DQUOT" }, \ - { -NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \ - { -NFS4ERR_EXIST, "EXIST" }, \ - { -NFS4ERR_EXPIRED, "EXPIRED" }, \ - { -NFS4ERR_FBIG, "FBIG" }, \ - { -NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \ - { -NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \ - { -NFS4ERR_GRACE, "GRACE" }, \ - { -NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \ - { -NFS4ERR_INVAL, "INVAL" }, \ - { -NFS4ERR_IO, "IO" }, \ - { -NFS4ERR_ISDIR, "ISDIR" }, \ - { -NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \ - { -NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \ - { -NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \ - { -NFS4ERR_LOCKED, "LOCKED" }, \ - { -NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \ - { -NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \ - { -NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \ - { -NFS4ERR_MLINK, "MLINK" }, \ - { -NFS4ERR_MOVED, "MOVED" }, \ - { -NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \ - { -NFS4ERR_NOENT, "NOENT" }, \ - { -NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \ - { -NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \ - { -NFS4ERR_NOSPC, "NOSPC" }, \ - { -NFS4ERR_NOTDIR, "NOTDIR" }, \ - { -NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \ - { -NFS4ERR_NOTSUPP, "NOTSUPP" }, \ - { -NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \ - { -NFS4ERR_NOT_SAME, "NOT_SAME" }, \ - { -NFS4ERR_NO_GRACE, "NO_GRACE" }, \ - { -NFS4ERR_NXIO, "NXIO" }, \ - { -NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \ - { -NFS4ERR_OPENMODE, "OPENMODE" }, \ - { -NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \ - { -NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \ - { -NFS4ERR_PERM, "PERM" }, \ - { -NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \ - { -NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \ - { -NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \ - { -NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \ - { -NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \ - { -NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \ - { -NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \ - { -NFS4ERR_REP_TOO_BIG_TO_CACHE, \ + { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \ + { NFS4ERR_DENIED, "DENIED" }, \ + { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \ + { NFS4ERR_DQUOT, "DQUOT" }, \ + { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \ + { NFS4ERR_EXIST, "EXIST" }, \ + { NFS4ERR_EXPIRED, "EXPIRED" }, \ + { NFS4ERR_FBIG, "FBIG" }, \ + { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \ + { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \ + { NFS4ERR_GRACE, "GRACE" }, \ + { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \ + { NFS4ERR_INVAL, "INVAL" }, \ + { NFS4ERR_IO, "IO" }, \ + { NFS4ERR_ISDIR, "ISDIR" }, \ + { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \ + { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \ + { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \ + { NFS4ERR_LOCKED, "LOCKED" }, \ + { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \ + { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \ + { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \ + { NFS4ERR_MLINK, "MLINK" }, \ + { NFS4ERR_MOVED, "MOVED" }, \ + { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \ + { NFS4ERR_NOENT, "NOENT" }, \ + { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \ + { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \ + { NFS4ERR_NOSPC, "NOSPC" }, \ + { NFS4ERR_NOTDIR, "NOTDIR" }, \ + { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \ + { NFS4ERR_NOTSUPP, "NOTSUPP" }, \ + { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \ + { NFS4ERR_NOT_SAME, "NOT_SAME" }, \ + { NFS4ERR_NO_GRACE, "NO_GRACE" }, \ + { NFS4ERR_NXIO, "NXIO" }, \ + { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \ + { NFS4ERR_OPENMODE, "OPENMODE" }, \ + { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \ + { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \ + { NFS4ERR_PERM, "PERM" }, \ + { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \ + { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \ + { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \ + { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \ + { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \ + { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \ + { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \ + { NFS4ERR_REP_TOO_BIG_TO_CACHE, \ "REP_TOO_BIG_TO_CACHE" }, \ - { -NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \ - { -NFS4ERR_RESOURCE, "RESOURCE" }, \ - { -NFS4ERR_RESTOREFH, "RESTOREFH" }, \ - { -NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \ - { -NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \ - { -NFS4ERR_ROFS, "ROFS" }, \ - { -NFS4ERR_SAME, "SAME" }, \ - { -NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \ - { -NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \ - { -NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \ - { -NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \ - { -NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \ - { -NFS4ERR_STALE, "STALE" }, \ - { -NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \ - { -NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \ - { -NFS4ERR_SYMLINK, "SYMLINK" }, \ - { -NFS4ERR_TOOSMALL, "TOOSMALL" }, \ - { -NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \ - { -NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \ - { -NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \ - { -NFS4ERR_WRONGSEC, "WRONGSEC" }, \ - { -NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ - { -NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ - { -NFS4ERR_XDEV, "XDEV" }) + { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \ + { NFS4ERR_RESOURCE, "RESOURCE" }, \ + { NFS4ERR_RESTOREFH, "RESTOREFH" }, \ + { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \ + { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \ + { NFS4ERR_ROFS, "ROFS" }, \ + { NFS4ERR_SAME, "SAME" }, \ + { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \ + { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \ + { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \ + { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \ + { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \ + { NFS4ERR_STALE, "STALE" }, \ + { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \ + { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \ + { NFS4ERR_SYMLINK, "SYMLINK" }, \ + { NFS4ERR_TOOSMALL, "TOOSMALL" }, \ + { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \ + { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \ + { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \ + { NFS4ERR_WRONGSEC, "WRONGSEC" }, \ + { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ + { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ + { NFS4ERR_XDEV, "XDEV" }) #define show_open_flags(flags) \ __print_flags(flags, "|", \ @@ -558,6 +703,13 @@ TRACE_EVENT(nfs4_close, ) ); +TRACE_DEFINE_ENUM(F_GETLK); +TRACE_DEFINE_ENUM(F_SETLK); +TRACE_DEFINE_ENUM(F_SETLKW); +TRACE_DEFINE_ENUM(F_RDLCK); +TRACE_DEFINE_ENUM(F_WRLCK); +TRACE_DEFINE_ENUM(F_UNLCK); + #define show_lock_cmd(type) \ __print_symbolic((int)type, \ { F_GETLK, "GETLK" }, \ @@ -1451,6 +1603,10 @@ DEFINE_NFS4_COMMIT_EVENT(nfs4_commit); #ifdef CONFIG_NFS_V4_1 DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds); +TRACE_DEFINE_ENUM(IOMODE_READ); +TRACE_DEFINE_ENUM(IOMODE_RW); +TRACE_DEFINE_ENUM(IOMODE_ANY); + #define show_pnfs_iomode(iomode) \ __print_symbolic(iomode, \ { IOMODE_READ, "READ" }, \ @@ -1528,6 +1684,20 @@ DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutcommit); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_layoutreturn); DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn_on_close); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_UNKNOWN); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_NO_PNFS); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_RD_ZEROLEN); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_MDSTHRESH); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_NOMEM); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_BULK_RECALL); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_FOUND_CACHED); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_RETURN); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_BLOCKED); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_INVALID_OPEN); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_RETRY); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); + #define show_pnfs_update_layout_reason(reason) \ __print_symbolic(reason, \ { PNFS_UPDATE_LAYOUT_UNKNOWN, "unknown" }, \ diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 5c4568a0804b..e54d899c1848 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -461,7 +461,7 @@ EXPORT_SYMBOL_GPL(nfs_wait_on_request); * @prev: previous request in desc, or NULL * @req: this request * - * Returns zero if @req can be coalesced into @desc, otherwise it returns + * Returns zero if @req cannot be coalesced into @desc, otherwise it returns * the size of the request. */ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, @@ -587,7 +587,7 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) } int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, - struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, + const struct cred *cred, const struct nfs_rpc_ops *rpc_ops, const struct rpc_call_ops *call_ops, int how, int flags) { struct rpc_task *task; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 06cb90e9bc6e..53726da5c010 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -275,7 +275,7 @@ pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) list_del_init(&lo->plh_layouts); spin_unlock(&clp->cl_lock); } - put_rpccred(lo->plh_lc_cred); + put_cred(lo->plh_lc_cred); return ld->free_layout_hdr(lo); } @@ -1038,7 +1038,7 @@ pnfs_alloc_init_layoutget_args(struct inode *ino, lgp->args.ctx = get_nfs_open_context(ctx); nfs4_stateid_copy(&lgp->args.stateid, stateid); lgp->gfp_flags = gfp_flags; - lgp->cred = get_rpccred(ctx->cred); + lgp->cred = get_cred(ctx->cred); return lgp; } @@ -1049,7 +1049,7 @@ void pnfs_layoutget_free(struct nfs4_layoutget *lgp) nfs4_free_pages(lgp->args.layout.pages, max_pages); if (lgp->args.inode) pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout); - put_rpccred(lgp->cred); + put_cred(lgp->cred); put_nfs_open_context(lgp->args.ctx); kfree(lgp); } @@ -1324,7 +1324,7 @@ pnfs_commit_and_return_layout(struct inode *inode) bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, - const struct rpc_cred *cred) + const struct cred *cred) { struct nfs_inode *nfsi = NFS_I(ino); struct nfs_open_context *ctx; @@ -1583,7 +1583,7 @@ alloc_init_layout_hdr(struct inode *ino, INIT_LIST_HEAD(&lo->plh_return_segs); INIT_LIST_HEAD(&lo->plh_bulk_destroy); lo->plh_inode = ino; - lo->plh_lc_cred = get_rpccred(ctx->cred); + lo->plh_lc_cred = get_cred(ctx->cred); lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID; return lo; } @@ -2928,7 +2928,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) spin_unlock(&inode->i_lock); data->args.inode = inode; - data->cred = get_rpccred(nfsi->layout->plh_lc_cred); + data->cred = get_cred(nfsi->layout->plh_lc_cred); nfs_fattr_init(&data->fattr); data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; data->res.fattr = &data->fattr; @@ -2941,7 +2941,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) if (ld->prepare_layoutcommit) { status = ld->prepare_layoutcommit(&data->args); if (status) { - put_rpccred(data->cred); + put_cred(data->cred); spin_lock(&inode->i_lock); set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); if (end_pos > nfsi->layout->plh_lwb) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index e2e9fcd5341d..5e80a07b7bea 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -200,7 +200,7 @@ struct pnfs_layout_hdr { u32 plh_return_seq; enum pnfs_iomode plh_return_iomode; loff_t plh_lwb; /* last write byte for layoutcommit */ - struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ + const struct cred *plh_lc_cred; /* layoutcommit cred */ struct inode *plh_inode; }; @@ -230,7 +230,7 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); extern size_t max_response_pages(struct nfs_server *server); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, - struct rpc_cred *cred); + const struct cred *cred); extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); @@ -280,7 +280,7 @@ int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, - const struct rpc_cred *cred); + const struct cred *cred); void pnfs_roc_release(struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, int ret); @@ -343,7 +343,7 @@ struct nfs4_deviceid_node { struct nfs4_deviceid_node * nfs4_find_get_deviceid(struct nfs_server *server, - const struct nfs4_deviceid *id, struct rpc_cred *cred, + const struct nfs4_deviceid *id, const struct cred *cred, gfp_t gfp_mask); void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *, @@ -694,7 +694,7 @@ static inline bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, - const struct rpc_cred *cred) + const struct cred *cred) { return false; } diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index e8a07b3f9aaa..7fb59487ee90 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -94,7 +94,7 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, static struct nfs4_deviceid_node * nfs4_get_device_info(struct nfs_server *server, const struct nfs4_deviceid *dev_id, - struct rpc_cred *cred, gfp_t gfp_flags) + const struct cred *cred, gfp_t gfp_flags) { struct nfs4_deviceid_node *d = NULL; struct pnfs_device *pdev = NULL; @@ -184,7 +184,7 @@ __nfs4_find_get_deviceid(struct nfs_server *server, struct nfs4_deviceid_node * nfs4_find_get_deviceid(struct nfs_server *server, - const struct nfs4_deviceid *id, struct rpc_cred *cred, + const struct nfs4_deviceid *id, const struct cred *cred, gfp_t gfp_mask) { long hash = nfs4_deviceid_hash(id); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index d5e4d3cd8c7f..f5ad75fafc3c 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -686,7 +686,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, rpc_clnt_setup_test_and_add_xprt, &rpcdata); if (xprtdata.cred) - put_rpccred(xprtdata.cred); + put_cred(xprtdata.cred); } else { clp = nfs4_set_ds_client(mds_srv, (struct sockaddr *)&da->da_addr, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index e0c257bd62b9..5552fa8b6e12 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -490,7 +490,7 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name) * from nfs_readdir by calling the decode_entry function directly. */ static int -nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, +nfs_proc_readdir(struct dentry *dentry, const struct cred *cred, u64 cookie, struct page **pages, unsigned int count, bool plus) { struct inode *dir = d_inode(dentry); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ac4b2f005778..22ce3c8a2f46 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -929,7 +929,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) data->minorversion = 0; data->need_mount = true; data->net = current->nsproxy->net_ns; - security_init_mnt_opts(&data->lsm_opts); + data->lsm_opts = NULL; } return data; } @@ -1206,7 +1206,7 @@ static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, static int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) { - char *p, *string, *secdata; + char *p, *string; int rc, sloppy = 0, invalid_option = 0; unsigned short protofamily = AF_UNSPEC; unsigned short mountfamily = AF_UNSPEC; @@ -1217,20 +1217,10 @@ static int nfs_parse_mount_options(char *raw, } dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); - secdata = alloc_secdata(); - if (!secdata) - goto out_nomem; - - rc = security_sb_copy_data(raw, secdata); - if (rc) - goto out_security_failure; - - rc = security_sb_parse_opts_str(secdata, &mnt->lsm_opts); + rc = security_sb_eat_lsm_opts(raw, &mnt->lsm_opts); if (rc) goto out_security_failure; - free_secdata(secdata); - while ((p = strsep(&raw, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; unsigned long option; @@ -1682,7 +1672,6 @@ out_nomem: printk(KERN_INFO "NFS: not enough memory to parse option\n"); return 0; out_security_failure: - free_secdata(secdata); printk(KERN_INFO "NFS: security options invalid: %d\n", rc); return 0; } @@ -2081,14 +2070,9 @@ static int nfs23_validate_mount_data(void *options, if (data->context[0]){ #ifdef CONFIG_SECURITY_SELINUX int rc; - char *opts_str = kmalloc(sizeof(data->context) + 8, GFP_KERNEL); - if (!opts_str) - return -ENOMEM; - strcpy(opts_str, "context="); data->context[NFS_MAX_CONTEXT_LEN] = '\0'; - strcat(opts_str, &data->context[0]); - rc = security_sb_parse_opts_str(opts_str, &args->lsm_opts); - kfree(opts_str); + rc = security_add_mnt_opt("context", data->context, + strlen(data->context), &args->lsm_opts); if (rc) return rc; #else @@ -2168,7 +2152,10 @@ static int nfs_validate_text_mount_data(void *options, if (args->version == 4) { #if IS_ENABLED(CONFIG_NFS_V4) - port = NFS_PORT; + if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) + port = NFS_RDMA_PORT; + else + port = NFS_PORT; max_namelen = NFS4_MAXNAMLEN; max_pathlen = NFS4_MAXPATHLEN; nfs_validate_transport_protocol(args); @@ -2178,8 +2165,11 @@ static int nfs_validate_text_mount_data(void *options, #else goto out_v4_not_compiled; #endif /* CONFIG_NFS_V4 */ - } else + } else { nfs_set_mount_transport_protocol(args); + if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) + port = NFS_RDMA_PORT; + } nfs_set_port(sap, &args->nfs_server.port, port); @@ -2265,7 +2255,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) options->version <= 6)))) return 0; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = nfs_alloc_parsed_mount_data(); if (data == NULL) return -ENOMEM; @@ -2304,8 +2294,10 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) /* compare new mount options with old ones */ error = nfs_compare_remount_data(nfss, data); + if (!error) + error = security_sb_remount(sb, data->lsm_opts); out: - kfree(data); + nfs_free_parsed_mount_data(data); return error; } EXPORT_SYMBOL_GPL(nfs_remount); @@ -2409,8 +2401,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n goto Ebusy; if (a->acdirmax != b->acdirmax) goto Ebusy; - if (b->auth_info.flavor_len > 0 && - clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) + if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) goto Ebusy; return 1; Ebusy: @@ -2543,7 +2534,7 @@ int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) kflags |= SECURITY_LSM_NATIVE_LABELS; - error = security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts, + error = security_sb_set_mnt_opts(s, mount_info->parsed->lsm_opts, kflags, &kflags_out); if (error) goto err; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index fd61bf0fce63..79b97b3c4427 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -31,7 +31,7 @@ static void nfs_free_unlinkdata(struct nfs_unlinkdata *data) { - put_rpccred(data->cred); + put_cred(data->cred); kfree(data->args.name.name); kfree(data); } @@ -177,11 +177,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name) goto out_free; data->args.name.len = name->len; - data->cred = rpc_lookup_cred(); - if (IS_ERR(data->cred)) { - status = PTR_ERR(data->cred); - goto out_free_name; - } + data->cred = get_current_cred(); data->res.dir_attr = &data->dir_attr; init_waitqueue_head(&data->wq); @@ -202,8 +198,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name) return 0; out_unlock: spin_unlock(&dentry->d_lock); - put_rpccred(data->cred); -out_free_name: + put_cred(data->cred); kfree(data->args.name.name); out_free: kfree(data); @@ -307,7 +302,7 @@ static void nfs_async_rename_release(void *calldata) iput(data->old_dir); iput(data->new_dir); nfs_sb_deactive(sb); - put_rpccred(data->cred); + put_cred(data->cred); kfree(data); } @@ -352,12 +347,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, return ERR_PTR(-ENOMEM); task_setup_data.callback_data = data; - data->cred = rpc_lookup_cred(); - if (IS_ERR(data->cred)) { - struct rpc_task *task = ERR_CAST(data->cred); - kfree(data); - return task; - } + data->cred = get_current_cred(); msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4f15665f0ad1..5a0bbf917a32 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1233,9 +1233,12 @@ int nfs_key_timeout_notify(struct file *filp, struct inode *inode) { struct nfs_open_context *ctx = nfs_file_open_context(filp); - struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth; - return rpcauth_key_timeout_notify(auth, ctx->cred); + if (nfs_ctx_key_to_expire(ctx, inode) && + !ctx->ll_cred) + /* Already expired! */ + return -EACCES; + return 0; } /* @@ -1244,8 +1247,23 @@ nfs_key_timeout_notify(struct file *filp, struct inode *inode) bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode) { struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth; + struct rpc_cred *cred = ctx->ll_cred; + struct auth_cred acred = { + .cred = ctx->cred, + }; - return rpcauth_cred_key_to_expire(auth, ctx->cred); + if (cred && !cred->cr_ops->crmatch(&acred, cred, 0)) { + put_rpccred(cred); + ctx->ll_cred = NULL; + cred = NULL; + } + if (!cred) + cred = auth->au_ops->lookup_cred(auth, &acred, 0); + if (!cred || IS_ERR(cred)) + return true; + ctx->ll_cred = cred; + return !!(cred->cr_ops->crkey_timeout && + cred->cr_ops->crkey_timeout(cred)); } /* diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 25987bcdf96f..c74e4538d0eb 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -844,24 +844,23 @@ static int max_cb_time(struct net *net) return max(nn->nfsd4_lease/10, (time_t)1) * HZ; } -static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) +static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) { if (clp->cl_minorversion == 0) { - char *principal = clp->cl_cred.cr_targ_princ ? - clp->cl_cred.cr_targ_princ : "nfs"; - struct rpc_cred *cred; - - cred = rpc_lookup_machine_cred(principal); - if (!IS_ERR(cred)) - get_rpccred(cred); - return cred; + client->cl_principal = clp->cl_cred.cr_targ_princ ? + clp->cl_cred.cr_targ_princ : "nfs"; + + return get_cred(rpc_machine_cred()); } else { - struct rpc_auth *auth = client->cl_auth; - struct auth_cred acred = {}; + struct cred *kcred; + + kcred = prepare_kernel_cred(NULL); + if (!kcred) + return NULL; - acred.uid = ses->se_cb_sec.uid; - acred.gid = ses->se_cb_sec.gid; - return auth->au_ops->lookup_cred(client->cl_auth, &acred, 0); + kcred->uid = ses->se_cb_sec.uid; + kcred->gid = ses->se_cb_sec.gid; + return kcred; } } @@ -884,7 +883,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), }; struct rpc_clnt *client; - struct rpc_cred *cred; + const struct cred *cred; if (clp->cl_minorversion == 0) { if (!clp->cl_cred.cr_principal && @@ -1214,7 +1213,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) if (clp->cl_cb_client) { rpc_shutdown_client(clp->cl_cb_client); clp->cl_cb_client = NULL; - put_rpccred(clp->cl_cb_cred); + put_cred(clp->cl_cb_cred); clp->cl_cb_cred = NULL; } if (clp->cl_cb_conn.cb_xprt) { diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 2b36aa037ce0..44517fb5c0de 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -656,7 +656,6 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) struct nfsd_net *nn; ktime_t now, cutoff; const struct nfsd4_layout_ops *ops; - LIST_HEAD(reaplist); switch (task->tk_status) { diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d505990dac7c..0cfd257ffdaf 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -863,8 +863,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_rename *rename = &u->rename; __be32 status; - if (opens_in_grace(SVC_NET(rqstp)) && - !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) + if (opens_in_grace(SVC_NET(rqstp))) return nfserr_grace; status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, rename->rn_snamelen, &cstate->current_fh, @@ -1016,8 +1015,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist, &write->wr_head, write->wr_buflen); - if (!nvecs) - return nfserr_io; WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp, @@ -1348,7 +1345,7 @@ static __be32 nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_fallocate *fallocate, int flags) { - __be32 status = nfserr_notsupp; + __be32 status; struct file *file; status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -2682,25 +2679,25 @@ static const struct nfsd4_operation nfsd4_ops[] = { /* NFSv4.2 operations */ [OP_ALLOCATE] = { .op_func = nfsd4_allocate, - .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_ALLOCATE", .op_rsize_bop = nfsd4_only_status_rsize, }, [OP_DEALLOCATE] = { .op_func = nfsd4_deallocate, - .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_DEALLOCATE", .op_rsize_bop = nfsd4_only_status_rsize, }, [OP_CLONE] = { .op_func = nfsd4_clone, - .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_CLONE", .op_rsize_bop = nfsd4_only_status_rsize, }, [OP_COPY] = { .op_func = nfsd4_copy, - .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_COPY", .op_rsize_bop = nfsd4_copy_rsize, }, diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 9c247fa1e959..5188f9f70c78 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -662,7 +662,7 @@ struct cld_net { struct cld_upcall { struct list_head cu_list; struct cld_net *cu_net; - struct task_struct *cu_task; + struct completion cu_done; struct cld_msg cu_msg; }; @@ -671,23 +671,18 @@ __cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) { int ret; struct rpc_pipe_msg msg; + struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg); memset(&msg, 0, sizeof(msg)); msg.data = cmsg; msg.len = sizeof(*cmsg); - /* - * Set task state before we queue the upcall. That prevents - * wake_up_process in the downcall from racing with schedule. - */ - set_current_state(TASK_UNINTERRUPTIBLE); ret = rpc_queue_upcall(pipe, &msg); if (ret < 0) { - set_current_state(TASK_RUNNING); goto out; } - schedule(); + wait_for_completion(&cup->cu_done); if (msg.errno < 0) ret = msg.errno; @@ -754,7 +749,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (copy_from_user(&cup->cu_msg, src, mlen) != 0) return -EFAULT; - wake_up_process(cup->cu_task); + complete(&cup->cu_done); return mlen; } @@ -769,7 +764,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg) if (msg->errno >= 0) return; - wake_up_process(cup->cu_task); + complete(&cup->cu_done); } static const struct rpc_pipe_ops cld_upcall_ops = { @@ -900,7 +895,7 @@ restart_search: goto restart_search; } } - new->cu_task = current; + init_completion(&new->cu_done); new->cu_msg.cm_vers = CLD_UPCALL_VERSION; put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid); new->cu_net = cn; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a334828723fa..fb3c9844c82a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5112,7 +5112,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags) } static __be32 -nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags) +nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags) { __be32 status; @@ -5195,7 +5195,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, break; case NFS4_OPEN_STID: case NFS4_LOCK_STID: - status = nfs4_check_olstateid(fhp, openlockstateid(s), flags); + status = nfs4_check_olstateid(openlockstateid(s), flags); break; default: status = nfserr_bad_stateid; @@ -6230,15 +6230,15 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, case NFS4_READ_LT: case NFS4_READW_LT: file_lock->fl_type = F_RDLCK; - break; + break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: file_lock->fl_type = F_WRLCK; - break; + break; default: dprintk("NFSD: nfs4_lockt: bad lock type!\n"); status = nfserr_inval; - goto out; + goto out; } lo = find_lockowner_str(cstate->clp, &lockt->lt_owner); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6384c9b94898..b33f9785b756 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1126,6 +1126,8 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) case 'Y': case 'y': case '1': + if (nn->nfsd_serv) + return -EBUSY; nfsd4_end_grace(nn); break; default: diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 6aacb325b6a0..396c76755b03 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -327,7 +327,7 @@ struct nfs4_client { #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) unsigned long cl_flags; - struct rpc_cred *cl_cb_cred; + const struct cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; u32 cl_cb_ident; #define NFSD4_CB_UP 0 diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index eb67098117b4..9824e32b2f23 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -396,10 +396,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, bool get_write_count; bool size_change = (iap->ia_valid & ATTR_SIZE); - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + if (iap->ia_valid & ATTR_SIZE) { accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) ftype = S_IFREG; + } + + /* + * If utimes(2) and friends are called with times not NULL, we should + * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission + * will return EACCESS, when the caller's effective UID does not match + * the owner of the file, and the caller is not privileged. In this + * situation, we should return EPERM(notify_change will return this). + */ + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME)) { + accmode |= NFSD_MAY_OWNER_OVERRIDE; + if (!(iap->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET))) + accmode |= NFSD_MAY_WRITE; + } /* Callers that do fh_verify should do the fh_want_write: */ get_write_count = !fhp->fh_dentry; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index eb1ce30412dc..832c1759a09a 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -30,6 +30,7 @@ #include <linux/quotaops.h> #include <linux/blkdev.h> #include <linux/uio.h> +#include <linux/mm.h> #include <cluster/masklog.h> @@ -397,7 +398,7 @@ static int ocfs2_readpages(struct file *filp, struct address_space *mapping, * Check whether a remote node truncated this file - we just * drop out in that case as it's not worth handling here. */ - last = list_entry(pages->prev, struct page, lru); + last = lru_to_page(pages); start = (loff_t)last->index << PAGE_SHIFT; if (start >= i_size_read(inode)) goto out_unlock; diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index b8fa1487cd85..8decbe95dcec 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -254,7 +254,7 @@ static ssize_t dlmfs_file_read(struct file *filp, if (!count) return 0; - if (!access_ok(VERIFY_WRITE, buf, count)) + if (!access_ok(buf, count)) return -EFAULT; /* don't read past the lvb */ @@ -302,7 +302,7 @@ static ssize_t dlmfs_file_write(struct file *filp, if (!count) return 0; - if (!access_ok(VERIFY_READ, buf, count)) + if (!access_ok(buf, count)) return -EFAULT; /* don't write past the lvb */ diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index fe53381b26b1..f038235c64bd 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -77,7 +77,7 @@ static int orangefs_readpages(struct file *file, for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page; - page = list_entry(pages->prev, struct page, lru); + page = lru_to_page(pages); list_del(&page->lru); if (!add_to_page_cache(page, mapping, diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index c4e98c9c1621..443bcd8c3c19 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -105,7 +105,7 @@ static int wait_for_free(struct slot_map *m) left = t; else left = t + (left - n); - if (unlikely(signal_pending(current))) + if (signal_pending(current)) left = -EINTR; } while (left > 0); diff --git a/fs/pnode.c b/fs/pnode.c index 53d411a371ce..1100e810d855 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -10,6 +10,7 @@ #include <linux/mount.h> #include <linux/fs.h> #include <linux/nsproxy.h> +#include <uapi/linux/mount.h> #include "internal.h" #include "pnode.h" diff --git a/fs/proc/base.c b/fs/proc/base.c index d7fd1ca807d2..633a63462573 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -581,8 +581,10 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, /* * print the file header */ - seq_printf(m, "%-25s %-20s %-20s %-10s\n", - "Limit", "Soft Limit", "Hard Limit", "Units"); + seq_puts(m, "Limit " + "Soft Limit " + "Hard Limit " + "Units \n"); for (i = 0; i < RLIM_NLIMITS; i++) { if (rlim[i].rlim_cur == RLIM_INFINITY) @@ -2356,10 +2358,13 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, return -ESRCH; if (p != current) { - if (!capable(CAP_SYS_NICE)) { + rcu_read_lock(); + if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { + rcu_read_unlock(); count = -EPERM; goto out; } + rcu_read_unlock(); err = security_task_setscheduler(p); if (err) { @@ -2392,11 +2397,14 @@ static int timerslack_ns_show(struct seq_file *m, void *v) return -ESRCH; if (p != current) { - - if (!capable(CAP_SYS_NICE)) { + rcu_read_lock(); + if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { + rcu_read_unlock(); err = -EPERM; goto out; } + rcu_read_unlock(); + err = security_task_getscheduler(p); if (err) goto out; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 5792f9e39466..da649ccd6804 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -59,7 +59,6 @@ static struct kmem_cache *pde_opener_cache __ro_after_init; static struct inode *proc_alloc_inode(struct super_block *sb) { struct proc_inode *ei; - struct inode *inode; ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); if (!ei) @@ -71,8 +70,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb) ei->sysctl = NULL; ei->sysctl_entry = NULL; ei->ns_ops = NULL; - inode = &ei->vfs_inode; - return inode; + return &ei->vfs_inode; } static void proc_i_callback(struct rcu_head *head) diff --git a/fs/proc/util.c b/fs/proc/util.c index b161cfa0f9fa..98f8adc17345 100644 --- a/fs/proc/util.c +++ b/fs/proc/util.c @@ -1,4 +1,5 @@ #include <linux/dcache.h> +#include "internal.h" unsigned name_to_int(const struct qstr *qstr) { diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c index 24db02de1787..97fcef74e5af 100644 --- a/fs/pstore/pmsg.c +++ b/fs/pstore/pmsg.c @@ -33,7 +33,7 @@ static ssize_t write_pmsg(struct file *file, const char __user *buf, record.size = count; /* check outside lock, page in any data. write_user also checks */ - if (!access_ok(VERIFY_READ, buf, count)) + if (!access_ok(buf, count)) return -EFAULT; mutex_lock(&pmsg_lock); diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 96f7d32cd184..898c8321b343 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -128,7 +128,6 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id, struct pstore_record *record) { struct persistent_ram_zone *prz; - bool update = (record->type == PSTORE_TYPE_DMESG); /* Give up if we never existed or have hit the end. */ if (!przs) @@ -139,7 +138,7 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id, return NULL; /* Update old/shadowed buffer. */ - if (update) + if (prz->type == PSTORE_TYPE_DMESG) persistent_ram_save_old(prz); if (!persistent_ram_old_size(prz)) @@ -711,18 +710,15 @@ static int ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ramoops_platform_data *pdata = dev->platform_data; + struct ramoops_platform_data pdata_local; struct ramoops_context *cxt = &oops_cxt; size_t dump_mem_sz; phys_addr_t paddr; int err = -EINVAL; if (dev_of_node(dev) && !pdata) { - pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) { - pr_err("cannot allocate platform data buffer\n"); - err = -ENOMEM; - goto fail_out; - } + pdata = &pdata_local; + memset(pdata, 0, sizeof(*pdata)); err = ramoops_parse_dt(pdev, pdata); if (err < 0) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index c11711c2cc83..f375c0735351 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -357,7 +357,7 @@ int notrace persistent_ram_write_user(struct persistent_ram_zone *prz, int rem, ret = 0, c = count; size_t start; - if (unlikely(!access_ok(VERIFY_READ, s, count))) + if (unlikely(!access_ok(s, count))) return -EFAULT; if (unlikely(c > prz->buffer_size)) { s += c - prz->buffer_size; diff --git a/fs/read_write.c b/fs/read_write.c index 58f30537c47a..ff3c5e6f87cf 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -442,7 +442,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) return -EBADF; if (!(file->f_mode & FMODE_CAN_READ)) return -EINVAL; - if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) + if (unlikely(!access_ok(buf, count))) return -EFAULT; ret = rw_verify_area(READ, file, pos, count); @@ -538,7 +538,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ return -EBADF; if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; - if (unlikely(!access_ok(VERIFY_READ, buf, count))) + if (unlikely(!access_ok(buf, count))) return -EFAULT; ret = rw_verify_area(WRITE, file, pos, count); @@ -718,9 +718,6 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, return ret; } -/* A write operation does a read from user space and vice versa */ -#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) - /** * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace * into the kernel and check that it is valid. @@ -810,7 +807,7 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, goto out; } if (type >= 0 - && unlikely(!access_ok(vrfy_dir(type), buf, len))) { + && unlikely(!access_ok(buf, len))) { ret = -EFAULT; goto out; } @@ -856,7 +853,7 @@ ssize_t compat_rw_copy_check_uvector(int type, *ret_pointer = iov; ret = -EFAULT; - if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) + if (!access_ok(uvector, nr_segs*sizeof(*uvector))) goto out; /* @@ -881,7 +878,7 @@ ssize_t compat_rw_copy_check_uvector(int type, if (len < 0) /* size_t not fitting in compat_ssize_t .. */ goto out; if (type >= 0 && - !access_ok(vrfy_dir(type), compat_ptr(buf), len)) { + !access_ok(compat_ptr(buf), len)) { ret = -EFAULT; goto out; } diff --git a/fs/readdir.c b/fs/readdir.c index d97f548e6323..2f6a4534e0df 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -105,7 +105,7 @@ static int fillonedir(struct dir_context *ctx, const char *name, int namlen, } buf->result++; dirent = buf->dirent; - if (!access_ok(VERIFY_WRITE, dirent, + if (!access_ok(dirent, (unsigned long)(dirent->d_name + namlen + 1) - (unsigned long)dirent)) goto efault; @@ -221,7 +221,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, }; int error; - if (!access_ok(VERIFY_WRITE, dirent, count)) + if (!access_ok(dirent, count)) return -EFAULT; f = fdget_pos(fd); @@ -304,7 +304,7 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, }; int error; - if (!access_ok(VERIFY_WRITE, dirent, count)) + if (!access_ok(dirent, count)) return -EFAULT; f = fdget_pos(fd); @@ -365,7 +365,7 @@ static int compat_fillonedir(struct dir_context *ctx, const char *name, } buf->result++; dirent = buf->dirent; - if (!access_ok(VERIFY_WRITE, dirent, + if (!access_ok(dirent, (unsigned long)(dirent->d_name + namlen + 1) - (unsigned long)dirent)) goto efault; @@ -475,7 +475,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, }; int error; - if (!access_ok(VERIFY_WRITE, dirent, count)) + if (!access_ok(dirent, count)) return -EFAULT; f = fdget_pos(fd); diff --git a/fs/select.c b/fs/select.c index 4c8652390c94..d0f35dbc0e8f 100644 --- a/fs/select.c +++ b/fs/select.c @@ -381,9 +381,6 @@ typedef struct { #define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) /* - * We do a VERIFY_WRITE here even though we are only reading this time: - * we'll write to it eventually.. - * * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned. */ static inline @@ -782,7 +779,7 @@ SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp, sigset_t __user *up = NULL; if (sig) { - if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t)) + if (!access_ok(sig, sizeof(void *)+sizeof(size_t)) || __get_user(up, (sigset_t __user * __user *)sig) || __get_user(sigsetsize, (size_t __user *)(sig+sizeof(void *)))) @@ -802,7 +799,7 @@ SYSCALL_DEFINE6(pselect6_time32, int, n, fd_set __user *, inp, fd_set __user *, sigset_t __user *up = NULL; if (sig) { - if (!access_ok(VERIFY_READ, sig, sizeof(void *)+sizeof(size_t)) + if (!access_ok(sig, sizeof(void *)+sizeof(size_t)) || __get_user(up, (sigset_t __user * __user *)sig) || __get_user(sigsetsize, (size_t __user *)(sig+sizeof(void *)))) @@ -1368,7 +1365,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp, compat_uptr_t up = 0; if (sig) { - if (!access_ok(VERIFY_READ, sig, + if (!access_ok(sig, sizeof(compat_uptr_t)+sizeof(compat_size_t)) || __get_user(up, (compat_uptr_t __user *)sig) || __get_user(sigsetsize, @@ -1390,7 +1387,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, compat_uptr_t up = 0; if (sig) { - if (!access_ok(VERIFY_READ, sig, + if (!access_ok(sig, sizeof(compat_uptr_t)+sizeof(compat_size_t)) || __get_user(up, (compat_uptr_t __user *)sig) || __get_user(sigsetsize, diff --git a/fs/super.c b/fs/super.c index ca53a08497ed..48e25eba8465 100644 --- a/fs/super.c +++ b/fs/super.c @@ -35,6 +35,7 @@ #include <linux/fsnotify.h> #include <linux/lockdep.h> #include <linux/user_namespace.h> +#include <uapi/linux/mount.h> #include "internal.h" static int thaw_super_locked(struct super_block *sb); @@ -1245,17 +1246,13 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) { struct dentry *root; struct super_block *sb; - char *secdata = NULL; int error = -ENOMEM; + void *sec_opts = NULL; if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { - secdata = alloc_secdata(); - if (!secdata) - goto out; - - error = security_sb_copy_data(data, secdata); + error = security_sb_eat_lsm_opts(data, &sec_opts); if (error) - goto out_free_secdata; + return ERR_PTR(error); } root = type->mount(type, flags, name, data); @@ -1276,10 +1273,16 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) smp_wmb(); sb->s_flags |= SB_BORN; - error = security_sb_kern_mount(sb, flags, secdata); + error = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL); if (error) goto out_sb; + if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT))) { + error = security_sb_kern_mount(sb); + if (error) + goto out_sb; + } + /* * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE * but s_maxbytes was an unsigned long long for many releases. Throw @@ -1290,14 +1293,13 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) "negative value (%lld)\n", type->name, sb->s_maxbytes); up_write(&sb->s_umount); - free_secdata(secdata); + security_free_mnt_opts(&sec_opts); return root; out_sb: dput(root); deactivate_locked_super(sb); out_free_secdata: - free_secdata(secdata); -out: + security_free_mnt_opts(&sec_opts); return ERR_PTR(error); } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index feeae8081c22..aa85f2874a9f 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -43,7 +43,8 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) kuid_t uid; kgid_t gid; - BUG_ON(!kobj); + if (WARN_ON(!kobj)) + return -EINVAL; if (kobj->parent) parent = kobj->parent->sd; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index bb71db63c99c..51398457fe00 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -325,7 +325,8 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, kuid_t uid; kgid_t gid; - BUG_ON(!kobj || !kobj->sd || !attr); + if (WARN_ON(!kobj || !kobj->sd || !attr)) + return -EINVAL; kobject_get_ownership(kobj, &uid, &gid); return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, @@ -537,7 +538,8 @@ int sysfs_create_bin_file(struct kobject *kobj, kuid_t uid; kgid_t gid; - BUG_ON(!kobj || !kobj->sd || !attr); + if (WARN_ON(!kobj || !kobj->sd || !attr)) + return -EINVAL; kobject_get_ownership(kobj, &uid, &gid); return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true, diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 1eb2d6307663..57038604d4a8 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -112,7 +112,8 @@ static int internal_create_group(struct kobject *kobj, int update, kgid_t gid; int error; - BUG_ON(!kobj || (!update && !kobj->sd)); + if (WARN_ON(!kobj || (!update && !kobj->sd))) + return -EINVAL; /* Updates may happen before the object has been instantiated */ if (unlikely(update && !kobj->sd)) diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 215c225b2ca1..c4deecc80f67 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -23,7 +23,8 @@ static int sysfs_do_create_link_sd(struct kernfs_node *parent, { struct kernfs_node *kn, *target = NULL; - BUG_ON(!name || !parent); + if (WARN_ON(!name || !parent)) + return -EINVAL; /* * We don't own @target_kobj and it may be removed at any time. diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b21ea2ba768d..eedc5e0156ff 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1992,7 +1992,6 @@ xfs_buf_delwri_submit_buffers( struct list_head *wait_list) { struct xfs_buf *bp, *n; - LIST_HEAD (submit_list); int pinned = 0; struct blk_plug plug; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index ec2e63a7963b..f3ef70c542e1 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -40,7 +40,6 @@ xfs_growfs_data_private( xfs_rfsblock_t new; xfs_agnumber_t oagcount; xfs_trans_t *tp; - LIST_HEAD (buffer_list); struct aghdr_init_data id = {}; nb = in->newblocks; |