summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c1
-rw-r--r--fs/9p/vfs_file.c1
-rw-r--r--fs/affs/amigaffs.c3
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c1778
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/binfmt_aout.c2
-rw-r--r--fs/binfmt_elf.c29
-rw-r--r--fs/bio.c52
-rw-r--r--fs/block_dev.c16
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/btrfs/inode.c1
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/ceph/file.c1
-rw-r--r--fs/cifs/inode.c3
-rw-r--r--fs/compat.c1
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/dcache.c9
-rw-r--r--fs/direct-io.c23
-rw-r--r--fs/dlm/config.c2
-rw-r--r--fs/dlm/lock.c18
-rw-r--r--fs/dlm/lockspace.c1
-rw-r--r--fs/dlm/lowcomms.c10
-rw-r--r--fs/dlm/recover.c52
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/file.c1
-rw-r--r--fs/ecryptfs/messaging.c6
-rw-r--r--fs/eventpoll.c40
-rw-r--r--fs/exec.c12
-rw-r--r--fs/exportfs/expfs.c3
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext3/inode.c1
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/file.c1
-rw-r--r--fs/ext4/indirect.c1
-rw-r--r--fs/ext4/inode.c3
-rw-r--r--fs/ext4/page-io.c1
-rw-r--r--fs/f2fs/data.c1
-rw-r--r--fs/fat/dir.c8
-rw-r--r--fs/fat/fat.h2
-rw-r--r--fs/fat/inode.c78
-rw-r--r--fs/fat/nfs.c3
-rw-r--r--fs/fscache/cookie.c11
-rw-r--r--fs/fuse/cuse.c1
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/fuse/file.c1
-rw-r--r--fs/gfs2/aops.c1
-rw-r--r--fs/gfs2/file.c3
-rw-r--r--fs/gfs2/sys.c18
-rw-r--r--fs/hfs/inode.c1
-rw-r--r--fs/hfsplus/Makefile4
-rw-r--r--fs/hfsplus/attributes.c399
-rw-r--r--fs/hfsplus/bfind.c93
-rw-r--r--fs/hfsplus/bnode.c6
-rw-r--r--fs/hfsplus/brec.c23
-rw-r--r--fs/hfsplus/btree.c8
-rw-r--r--fs/hfsplus/catalog.c36
-rw-r--r--fs/hfsplus/dir.c55
-rw-r--r--fs/hfsplus/extents.c4
-rw-r--r--fs/hfsplus/hfsplus_fs.h52
-rw-r--r--fs/hfsplus/hfsplus_raw.h68
-rw-r--r--fs/hfsplus/inode.c19
-rw-r--r--fs/hfsplus/ioctl.c108
-rw-r--r--fs/hfsplus/super.c56
-rw-r--r--fs/hfsplus/unicode.c7
-rw-r--r--fs/hfsplus/xattr.c709
-rw-r--r--fs/hfsplus/xattr.h60
-rw-r--r--fs/hfsplus/xattr_security.c104
-rw-r--r--fs/hfsplus/xattr_trusted.c63
-rw-r--r--fs/hfsplus/xattr_user.c63
-rw-r--r--fs/inode.c19
-rw-r--r--fs/jfs/inode.c1
-rw-r--r--fs/lockd/host.c29
-rw-r--r--fs/lockd/svcsubs.c7
-rw-r--r--fs/ncpfs/ioctl.c2
-rw-r--r--fs/nfs/client.c1
-rw-r--r--fs/nfs/nfs4client.c13
-rw-r--r--fs/nfs/pnfs_dev.c9
-rw-r--r--fs/nfsd/nfs4state.c7
-rw-r--r--fs/nfsd/nfscache.c3
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfssvc.c6
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/inode.c2
-rw-r--r--fs/notify/fsnotify.c3
-rw-r--r--fs/notify/inode_mark.c19
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c1
-rw-r--r--fs/notify/inotify/inotify_user.c28
-rw-r--r--fs/notify/vfsmount_mark.c19
-rw-r--r--fs/ntfs/file.c1
-rw-r--r--fs/ntfs/inode.c1
-rw-r--r--fs/ocfs2/aops.c3
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/cluster/tcp.c32
-rw-r--r--fs/ocfs2/dcache.c3
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c6
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/pipe.c1
-rw-r--r--fs/proc/base.c9
-rw-r--r--fs/proc/generic.c23
-rw-r--r--fs/proc/inode.c22
-rw-r--r--fs/proc/internal.h3
-rw-r--r--fs/proc/kcore.c3
-rw-r--r--fs/proc/meminfo.c6
-rw-r--r--fs/proc/proc_devtree.c13
-rw-r--r--fs/proc/proc_sysctl.c19
-rw-r--r--fs/proc/vmcore.c15
-rw-r--r--fs/read_write.c35
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/seq_file.c40
-rw-r--r--fs/signalfd.c65
-rw-r--r--fs/super.c6
-rw-r--r--fs/sysfs/bin.c3
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/inode.c1
-rw-r--r--fs/xfs/xfs_aops.c1
-rw-r--r--fs/xfs/xfs_file.c1
-rw-r--r--fs/xfs/xfs_log_recover.c3
120 files changed, 3023 insertions, 1634 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 0ad61c6a65a5..055562c580b4 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,6 +33,7 @@
#include <linux/pagemap.h>
#include <linux/idr.h>
#include <linux/sched.h>
+#include <linux/aio.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index c2483e97beee..357260be9244 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -620,6 +620,7 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
lock_page(page);
if (page->mapping != inode->i_mapping)
goto out_unlock;
+ wait_for_stable_page(page);
return VM_FAULT_LOCKED;
out_unlock:
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index eb82ee53ee0b..d9a43674cb94 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -125,9 +125,8 @@ static void
affs_fix_dcache(struct inode *inode, u32 entry_ino)
{
struct dentry *dentry;
- struct hlist_node *p;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
if (entry_ino == (u32)(long)dentry->d_fsdata) {
dentry->d_fsdata = (void *)inode->i_ino;
break;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9aa52d93c73c..5151ea381c08 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -14,6 +14,7 @@
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
+#include <linux/aio.h>
#include "internal.h"
static int afs_write_back_from_locked_page(struct afs_writeback *wb,
diff --git a/fs/aio.c b/fs/aio.c
index 71f613cf4a85..24ba228195d6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -8,6 +8,8 @@
*
* See ../COPYING for licensing terms.
*/
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/errno.h>
@@ -18,14 +20,14 @@
#include <linux/backing-dev.h>
#include <linux/uio.h>
-#define DEBUG 0
-
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/mman.h>
+#include <linux/bio.h>
#include <linux/mmu_context.h>
+#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/aio.h>
@@ -35,15 +37,94 @@
#include <linux/eventfd.h>
#include <linux/blkdev.h>
#include <linux/compat.h>
+#include <linux/percpu-refcount.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
-#if DEBUG > 1
-#define dprintk printk
-#else
-#define dprintk(x...) do { ; } while (0)
-#endif
+#define AIO_RING_MAGIC 0xa10a10a1
+#define AIO_RING_COMPAT_FEATURES 1
+#define AIO_RING_INCOMPAT_FEATURES 0
+struct aio_ring {
+ unsigned id; /* kernel internal index number */
+ unsigned nr; /* number of io_events */
+ unsigned head;
+ unsigned tail;
+
+ unsigned magic;
+ unsigned compat_features;
+ unsigned incompat_features;
+ unsigned header_length; /* size of aio_ring */
+
+
+ struct io_event io_events[0];
+}; /* 128 bytes + ring size */
+
+#define AIO_RING_PAGES 8
+
+struct kioctx_cpu {
+ unsigned reqs_available;
+};
+
+struct kioctx {
+ struct percpu_ref users;
+
+ /* This needs improving */
+ unsigned long user_id;
+ struct hlist_node list;
+
+ struct __percpu kioctx_cpu *cpu;
+
+ unsigned req_batch;
+
+ unsigned nr;
+
+ /* sys_io_setup currently limits this to an unsigned int */
+ unsigned max_reqs;
+
+ unsigned long mmap_base;
+ unsigned long mmap_size;
+
+ struct page **ring_pages;
+ long nr_pages;
+
+ struct rcu_head rcu_head;
+ struct work_struct rcu_work;
+
+ struct {
+ atomic_t reqs_available;
+ } ____cacheline_aligned_in_smp;
+
+ struct {
+ spinlock_t ctx_lock;
+ struct list_head active_reqs; /* used for cancellation */
+ } ____cacheline_aligned_in_smp;
+
+ struct {
+ struct mutex ring_lock;
+ wait_queue_head_t wait;
+
+ /*
+ * Copy of the real tail, that aio_complete uses - to reduce
+ * cacheline bouncing. The real tail will tend to be much more
+ * contended - since typically events are delivered one at a
+ * time, and then aio_read_events() slurps them up a bunch at a
+ * time - so it's helpful if aio_read_events() isn't also
+ * contending for the tail. So, aio_complete() updates
+ * shadow_tail whenever it updates tail.
+ *
+ * Also needed because tail is used as a hacky lock and isn't
+ * always the real tail.
+ */
+ unsigned shadow_tail;
+ } ____cacheline_aligned_in_smp;
+
+ struct {
+ unsigned tail;
+ } ____cacheline_aligned_in_smp;
+
+ struct page *internal_pages[AIO_RING_PAGES];
+};
/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
@@ -54,11 +135,6 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request
static struct kmem_cache *kiocb_cachep;
static struct kmem_cache *kioctx_cachep;
-static struct workqueue_struct *aio_wq;
-
-static void aio_kick_handler(struct work_struct *);
-static void aio_queue_work(struct kioctx *);
-
/* aio_setup
* Creates the slab caches used by the aio routines, panic on
* failure as this is done early during the boot sequence.
@@ -68,10 +144,7 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
- aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
- BUG_ON(!aio_wq);
-
- pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
+ pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
return 0;
}
@@ -79,31 +152,29 @@ __initcall(aio_setup);
static void aio_free_ring(struct kioctx *ctx)
{
- struct aio_ring_info *info = &ctx->ring_info;
long i;
- for (i=0; i<info->nr_pages; i++)
- put_page(info->ring_pages[i]);
+ for (i = 0; i < ctx->nr_pages; i++)
+ put_page(ctx->ring_pages[i]);
- if (info->mmap_size) {
- BUG_ON(ctx->mm != current->mm);
- vm_munmap(info->mmap_base, info->mmap_size);
- }
+ if (ctx->mmap_size)
+ vm_munmap(ctx->mmap_base, ctx->mmap_size);
- if (info->ring_pages && info->ring_pages != info->internal_pages)
- kfree(info->ring_pages);
- info->ring_pages = NULL;
- info->nr = 0;
+ if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
+ kfree(ctx->ring_pages);
}
static int aio_setup_ring(struct kioctx *ctx)
{
struct aio_ring *ring;
- struct aio_ring_info *info = &ctx->ring_info;
unsigned nr_events = ctx->max_reqs;
- unsigned long size;
+ struct mm_struct *mm = current->mm;
+ unsigned long size, populate;
int nr_pages;
+ nr_events = max(nr_events, num_possible_cpus() * 4);
+ nr_events *= 2;
+
/* Compensate for the ring buffer's head/tail overlap entry */
nr_events += 2; /* 1 is required, 2 for good luck */
@@ -116,43 +187,44 @@ static int aio_setup_ring(struct kioctx *ctx)
nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
- info->nr = 0;
- info->ring_pages = info->internal_pages;
+ ctx->nr = 0;
+ ctx->ring_pages = ctx->internal_pages;
if (nr_pages > AIO_RING_PAGES) {
- info->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
- if (!info->ring_pages)
+ ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!ctx->ring_pages)
return -ENOMEM;
}
- info->mmap_size = nr_pages * PAGE_SIZE;
- dprintk("attempting mmap of %lu bytes\n", info->mmap_size);
- down_write(&ctx->mm->mmap_sem);
- info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size,
- PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, 0);
- if (IS_ERR((void *)info->mmap_base)) {
- up_write(&ctx->mm->mmap_sem);
- info->mmap_size = 0;
+ ctx->mmap_size = nr_pages * PAGE_SIZE;
+ pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size);
+ down_write(&mm->mmap_sem);
+ ctx->mmap_base = do_mmap_pgoff(NULL, 0, ctx->mmap_size,
+ PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, 0, &populate);
+ if (IS_ERR((void *)ctx->mmap_base)) {
+ up_write(&mm->mmap_sem);
+ ctx->mmap_size = 0;
aio_free_ring(ctx);
return -EAGAIN;
}
- dprintk("mmap address: 0x%08lx\n", info->mmap_base);
- info->nr_pages = get_user_pages(current, ctx->mm,
- info->mmap_base, nr_pages,
- 1, 0, info->ring_pages, NULL);
- up_write(&ctx->mm->mmap_sem);
+ pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
+ ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
+ 1, 0, ctx->ring_pages, NULL);
+ up_write(&mm->mmap_sem);
- if (unlikely(info->nr_pages != nr_pages)) {
+ if (unlikely(ctx->nr_pages != nr_pages)) {
aio_free_ring(ctx);
return -EAGAIN;
}
+ if (populate)
+ mm_populate(ctx->mmap_base, populate);
- ctx->user_id = info->mmap_base;
+ ctx->user_id = ctx->mmap_base;
+ ctx->nr = nr_events; /* trusted copy */
- info->nr = nr_events; /* trusted copy */
-
- ring = kmap_atomic(info->ring_pages[0]);
+ ring = kmap_atomic(ctx->ring_pages[0]);
ring->nr = nr_events; /* user copy */
ring->id = ctx->user_id;
ring->head = ring->tail = 0;
@@ -161,72 +233,142 @@ static int aio_setup_ring(struct kioctx *ctx)
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring);
kunmap_atomic(ring);
+ flush_dcache_page(ctx->ring_pages[0]);
return 0;
}
-
-/* aio_ring_event: returns a pointer to the event at the given index from
- * kmap_atomic(). Release the pointer with put_aio_ring_event();
- */
#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event))
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
-#define aio_ring_event(info, nr) ({ \
- unsigned pos = (nr) + AIO_EVENTS_OFFSET; \
- struct io_event *__event; \
- __event = kmap_atomic( \
- (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \
- __event += pos % AIO_EVENTS_PER_PAGE; \
- __event; \
-})
-
-#define put_aio_ring_event(event) do { \
- struct io_event *__event = (event); \
- (void)__event; \
- kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \
-} while(0)
-
-static void ctx_rcu_free(struct rcu_head *head)
+void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
+{
+ struct kioctx *ctx = req->ki_ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+
+ if (!req->ki_list.next)
+ list_add(&req->ki_list, &ctx->active_reqs);
+
+ req->ki_cancel = cancel;
+
+ spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+}
+EXPORT_SYMBOL(kiocb_set_cancel_fn);
+
+static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb,
+ struct io_event *res)
+{
+ kiocb_cancel_fn *old, *cancel;
+ int ret = -EINVAL;
+
+ /*
+ * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
+ * actually has a cancel function, hence the cmpxchg()
+ */
+
+ cancel = ACCESS_ONCE(kiocb->ki_cancel);
+ do {
+ if (!cancel || cancel == KIOCB_CANCELLED)
+ return ret;
+
+ old = cancel;
+ cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
+ } while (cancel != old);
+
+ atomic_inc(&kiocb->ki_users);
+ spin_unlock_irq(&ctx->ctx_lock);
+
+ memset(res, 0, sizeof(*res));
+ res->obj = (u64)(unsigned long)kiocb->ki_obj.user;
+ res->data = kiocb->ki_user_data;
+ ret = cancel(kiocb, res);
+
+ spin_lock_irq(&ctx->ctx_lock);
+
+ return ret;
+}
+
+static void free_ioctx_rcu(struct rcu_head *head)
{
struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+
+ free_percpu(ctx->cpu);
kmem_cache_free(kioctx_cachep, ctx);
}
-/* __put_ioctx
- * Called when the last user of an aio context has gone away,
- * and the struct needs to be freed.
+/*
+ * When this function runs, the kioctx has been removed from the "hash table"
+ * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
+ * now it's safe to cancel any that need to be.
*/
-static void __put_ioctx(struct kioctx *ctx)
+static void free_ioctx(struct kioctx *ctx)
{
- unsigned nr_events = ctx->max_reqs;
- BUG_ON(ctx->reqs_active);
+ struct aio_ring *ring;
+ struct io_event res;
+ struct kiocb *req;
+ unsigned cpu, head, avail;
- cancel_delayed_work_sync(&ctx->wq);
- aio_free_ring(ctx);
- mmdrop(ctx->mm);
- ctx->mm = NULL;
- if (nr_events) {
- spin_lock(&aio_nr_lock);
- BUG_ON(aio_nr - nr_events > aio_nr);
- aio_nr -= nr_events;
- spin_unlock(&aio_nr_lock);
+ spin_lock_irq(&ctx->ctx_lock);
+
+ while (!list_empty(&ctx->active_reqs)) {
+ req = list_first_entry(&ctx->active_reqs,
+ struct kiocb, ki_list);
+
+ list_del_init(&req->ki_list);
+ kiocb_cancel(ctx, req, &res);
}
- pr_debug("__put_ioctx: freeing %p\n", ctx);
- call_rcu(&ctx->rcu_head, ctx_rcu_free);
-}
-static inline int try_get_ioctx(struct kioctx *kioctx)
-{
- return atomic_inc_not_zero(&kioctx->users);
+ spin_unlock_irq(&ctx->ctx_lock);
+
+ for_each_possible_cpu(cpu) {
+ struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu);
+
+ atomic_add(kcpu->reqs_available, &ctx->reqs_available);
+ kcpu->reqs_available = 0;
+ }
+
+ ring = kmap_atomic(ctx->ring_pages[0]);
+ head = ring->head;
+ kunmap_atomic(ring);
+
+ while (atomic_read(&ctx->reqs_available) < ctx->nr) {
+ wait_event(ctx->wait, head != ctx->shadow_tail);
+
+ avail = (head < ctx->shadow_tail ? ctx->shadow_tail : ctx->nr) - head;
+
+ atomic_add(avail, &ctx->reqs_available);
+ head += avail;
+ head %= ctx->nr;
+ }
+
+ WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr);
+
+ aio_free_ring(ctx);
+
+ spin_lock(&aio_nr_lock);
+ BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
+ aio_nr -= ctx->max_reqs;
+ spin_unlock(&aio_nr_lock);
+
+ pr_debug("freeing %p\n", ctx);
+
+ /*
+ * Here the call_rcu() is between the wait_event() for reqs_active to
+ * hit 0, and freeing the ioctx.
+ *
+ * aio_complete() decrements reqs_active, but it has to touch the ioctx
+ * after to issue a wakeup so we use rcu.
+ */
+ call_rcu(&ctx->rcu_head, free_ioctx_rcu);
}
-static inline void put_ioctx(struct kioctx *kioctx)
+static void put_ioctx(struct kioctx *ctx)
{
- BUG_ON(atomic_read(&kioctx->users) <= 0);
- if (unlikely(atomic_dec_and_test(&kioctx->users)))
- __put_ioctx(kioctx);
+ if (percpu_ref_put(&ctx->users))
+ free_ioctx(ctx);
}
/* ioctx_alloc
@@ -234,7 +376,7 @@ static inline void put_ioctx(struct kioctx *kioctx)
*/
static struct kioctx *ioctx_alloc(unsigned nr_events)
{
- struct mm_struct *mm;
+ struct mm_struct *mm = current->mm;
struct kioctx *ctx;
int err = -ENOMEM;
@@ -253,21 +395,29 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ERR_PTR(-ENOMEM);
ctx->max_reqs = nr_events;
- mm = ctx->mm = current->mm;
- atomic_inc(&mm->mm_count);
- atomic_set(&ctx->users, 2);
+ percpu_ref_init(&ctx->users);
+ rcu_read_lock();
+ percpu_ref_get(&ctx->users);
+ rcu_read_unlock();
+
spin_lock_init(&ctx->ctx_lock);
- spin_lock_init(&ctx->ring_info.ring_lock);
+ mutex_init(&ctx->ring_lock);
init_waitqueue_head(&ctx->wait);
INIT_LIST_HEAD(&ctx->active_reqs);
- INIT_LIST_HEAD(&ctx->run_list);
- INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler);
- if (aio_setup_ring(ctx) < 0)
+ ctx->cpu = alloc_percpu(struct kioctx_cpu);
+ if (!ctx->cpu)
goto out_freectx;
+ if (aio_setup_ring(ctx) < 0)
+ goto out_freepcpu;
+
+ atomic_set(&ctx->reqs_available, ctx->nr);
+ ctx->req_batch = ctx->nr / (num_possible_cpus() * 4);
+ BUG_ON(!ctx->req_batch);
+
/* limit the number of system wide aios */
spin_lock(&aio_nr_lock);
if (aio_nr + nr_events > aio_max_nr ||
@@ -283,64 +433,58 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
spin_unlock(&mm->ioctx_lock);
- dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
- ctx, ctx->user_id, current->mm, ctx->ring_info.nr);
+ pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
+ ctx, ctx->user_id, mm, ctx->nr);
return ctx;
out_cleanup:
err = -EAGAIN;
aio_free_ring(ctx);
+out_freepcpu:
+ free_percpu(ctx->cpu);
out_freectx:
- mmdrop(mm);
kmem_cache_free(kioctx_cachep, ctx);
- dprintk("aio: error allocating ioctx %d\n", err);
+ pr_debug("error allocating ioctx %d\n", err);
return ERR_PTR(err);
}
-/* kill_ctx
- * Cancels all outstanding aio requests on an aio context. Used
- * when the processes owning a context have all exited to encourage
- * the rapid destruction of the kioctx.
- */
-static void kill_ctx(struct kioctx *ctx)
+static void kill_ioctx_work(struct work_struct *work)
{
- int (*cancel)(struct kiocb *, struct io_event *);
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- struct io_event res;
+ struct kioctx *ctx = container_of(work, struct kioctx, rcu_work);
- spin_lock_irq(&ctx->ctx_lock);
- ctx->dead = 1;
- while (!list_empty(&ctx->active_reqs)) {
- struct list_head *pos = ctx->active_reqs.next;
- struct kiocb *iocb = list_kiocb(pos);
- list_del_init(&iocb->ki_list);
- cancel = iocb->ki_cancel;
- kiocbSetCancelled(iocb);
- if (cancel) {
- iocb->ki_users++;
- spin_unlock_irq(&ctx->ctx_lock);
- cancel(iocb, &res);
- spin_lock_irq(&ctx->ctx_lock);
- }
- }
+ wake_up_all(&ctx->wait);
+ put_ioctx(ctx);
+}
- if (!ctx->reqs_active)
- goto out;
+static void kill_ioctx_rcu(struct rcu_head *head)
+{
+ struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
- add_wait_queue(&ctx->wait, &wait);
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- while (ctx->reqs_active) {
- spin_unlock_irq(&ctx->ctx_lock);
- io_schedule();
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- spin_lock_irq(&ctx->ctx_lock);
- }
- __set_task_state(tsk, TASK_RUNNING);
- remove_wait_queue(&ctx->wait, &wait);
+ INIT_WORK(&ctx->rcu_work, kill_ioctx_work);
+ schedule_work(&ctx->rcu_work);
+}
-out:
- spin_unlock_irq(&ctx->ctx_lock);
+/* kill_ioctx
+ * Cancels all outstanding aio requests on an aio context. Used
+ * when the processes owning a context have all exited to encourage
+ * the rapid destruction of the kioctx.
+ */
+static void kill_ioctx(struct kioctx *ctx)
+{
+ if (percpu_ref_kill(&ctx->users)) {
+ hlist_del_rcu(&ctx->list);
+ /* Between hlist_del_rcu() and dropping the initial ref */
+ synchronize_rcu();
+
+ /*
+ * We can't punt to workqueue here because put_ioctx() ->
+ * free_ioctx() will unmap the ringbuffer, and that has to be
+ * done in the original process's context. kill_ioctx_rcu/work()
+ * exist for exit_aio(), as in that path free_ioctx() won't do
+ * the unmap.
+ */
+ kill_ioctx_work(&ctx->rcu_work);
+ }
}
/* wait_on_sync_kiocb:
@@ -348,9 +492,9 @@ out:
*/
ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
{
- while (iocb->ki_users) {
+ while (atomic_read(&iocb->ki_users)) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (!iocb->ki_users)
+ if (!atomic_read(&iocb->ki_users))
break;
io_schedule();
}
@@ -359,28 +503,20 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
}
EXPORT_SYMBOL(wait_on_sync_kiocb);
-/* exit_aio: called when the last user of mm goes away. At this point,
- * there is no way for any new requests to be submited or any of the
- * io_* syscalls to be called on the context. However, there may be
- * outstanding requests which hold references to the context; as they
- * go away, they will call put_ioctx and release any pinned memory
- * associated with the request (held via struct page * references).
+/*
+ * exit_aio: called when the last user of mm goes away. At this point, there is
+ * no way for any new requests to be submited or any of the io_* syscalls to be
+ * called on the context.
+ *
+ * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on
+ * them.
*/
void exit_aio(struct mm_struct *mm)
{
struct kioctx *ctx;
+ struct hlist_node *n;
- while (!hlist_empty(&mm->ioctx_list)) {
- ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
- hlist_del_rcu(&ctx->list);
-
- kill_ctx(ctx);
-
- if (1 != atomic_read(&ctx->users))
- printk(KERN_DEBUG
- "exit_aio:ioctx still alive: %d %d %d\n",
- atomic_read(&ctx->users), ctx->dead,
- ctx->reqs_active);
+ hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) {
/*
* We don't need to bother with munmap() here -
* exit_mmap(mm) is coming and it'll unmap everything.
@@ -388,150 +524,95 @@ void exit_aio(struct mm_struct *mm)
* as indicator that it needs to unmap the area,
* just set it to 0; aio_free_ring() is the only
* place that uses ->mmap_size, so it's safe.
- * That way we get all munmap done to current->mm -
- * all other callers have ctx->mm == current->mm.
*/
- ctx->ring_info.mmap_size = 0;
- put_ioctx(ctx);
+ ctx->mmap_size = 0;
+
+ if (percpu_ref_kill(&ctx->users)) {
+ hlist_del_rcu(&ctx->list);
+ call_rcu(&ctx->rcu_head, kill_ioctx_rcu);
+ }
}
}
-/* aio_get_req
- * Allocate a slot for an aio request. Increments the users count
- * of the kioctx so that the kioctx stays around until all requests are
- * complete. Returns NULL if no requests are free.
- *
- * Returns with kiocb->users set to 2. The io submit code path holds
- * an extra reference while submitting the i/o.
- * This prevents races between the aio code path referencing the
- * req (after submitting it) and aio_complete() freeing the req.
- */
-static struct kiocb *__aio_get_req(struct kioctx *ctx)
+static void put_reqs_available(struct kioctx *ctx, unsigned nr)
{
- struct kiocb *req = NULL;
-
- req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
- if (unlikely(!req))
- return NULL;
+ struct kioctx_cpu *kcpu;
- req->ki_flags = 0;
- req->ki_users = 2;
- req->ki_key = 0;
- req->ki_ctx = ctx;
- req->ki_cancel = NULL;
- req->ki_retry = NULL;
- req->ki_dtor = NULL;
- req->private = NULL;
- req->ki_iovec = NULL;
- INIT_LIST_HEAD(&req->ki_run_list);
- req->ki_eventfd = NULL;
-
- return req;
-}
+ preempt_disable();
+ kcpu = this_cpu_ptr(ctx->cpu);
-/*
- * struct kiocb's are allocated in batches to reduce the number of
- * times the ctx lock is acquired and released.
- */
-#define KIOCB_BATCH_SIZE 32L
-struct kiocb_batch {
- struct list_head head;
- long count; /* number of requests left to allocate */
-};
+ kcpu->reqs_available += nr;
+ while (kcpu->reqs_available >= ctx->req_batch * 2) {
+ kcpu->reqs_available -= ctx->req_batch;
+ atomic_add(ctx->req_batch, &ctx->reqs_available);
+ }
-static void kiocb_batch_init(struct kiocb_batch *batch, long total)
-{
- INIT_LIST_HEAD(&batch->head);
- batch->count = total;
+ preempt_enable();
}
-static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)
+static bool get_reqs_available(struct kioctx *ctx)
{
- struct kiocb *req, *n;
+ struct kioctx_cpu *kcpu;
+ bool ret = false;
- if (list_empty(&batch->head))
- return;
+ preempt_disable();
+ kcpu = this_cpu_ptr(ctx->cpu);
- spin_lock_irq(&ctx->ctx_lock);
- list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
- list_del(&req->ki_batch);
- list_del(&req->ki_list);
- kmem_cache_free(kiocb_cachep, req);
- ctx->reqs_active--;
- }
- if (unlikely(!ctx->reqs_active && ctx->dead))
- wake_up_all(&ctx->wait);
- spin_unlock_irq(&ctx->ctx_lock);
-}
-
-/*
- * Allocate a batch of kiocbs. This avoids taking and dropping the
- * context lock a lot during setup.
- */
-static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
-{
- unsigned short allocated, to_alloc;
- long avail;
- struct kiocb *req, *n;
- struct aio_ring *ring;
+ if (!kcpu->reqs_available) {
+ int old, avail = atomic_read(&ctx->reqs_available);
- to_alloc = min(batch->count, KIOCB_BATCH_SIZE);
- for (allocated = 0; allocated < to_alloc; allocated++) {
- req = __aio_get_req(ctx);
- if (!req)
- /* allocation failed, go with what we've got */
- break;
- list_add(&req->ki_batch, &batch->head);
- }
+ do {
+ if (avail < ctx->req_batch)
+ goto out;
- if (allocated == 0)
- goto out;
+ old = avail;
+ avail = atomic_cmpxchg(&ctx->reqs_available,
+ avail, avail - ctx->req_batch);
+ } while (avail != old);
- spin_lock_irq(&ctx->ctx_lock);
- ring = kmap_atomic(ctx->ring_info.ring_pages[0]);
-
- avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active;
- BUG_ON(avail < 0);
- if (avail < allocated) {
- /* Trim back the number of requests. */
- list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
- list_del(&req->ki_batch);
- kmem_cache_free(kiocb_cachep, req);
- if (--allocated <= avail)
- break;
- }
+ kcpu->reqs_available += ctx->req_batch;
}
- batch->count -= allocated;
- list_for_each_entry(req, &batch->head, ki_batch) {
- list_add(&req->ki_list, &ctx->active_reqs);
- ctx->reqs_active++;
- }
-
- kunmap_atomic(ring);
- spin_unlock_irq(&ctx->ctx_lock);
-
+ ret = true;
+ kcpu->reqs_available--;
out:
- return allocated;
+ preempt_enable();
+ return ret;
}
-static inline struct kiocb *aio_get_req(struct kioctx *ctx,
- struct kiocb_batch *batch)
+/* aio_get_req
+ * Allocate a slot for an aio request. Increments the ki_users count
+ * of the kioctx so that the kioctx stays around until all requests are
+ * complete. Returns NULL if no requests are free.
+ *
+ * Returns with kiocb->ki_users set to 2. The io submit code path holds
+ * an extra reference while submitting the i/o.
+ * This prevents races between the aio code path referencing the
+ * req (after submitting it) and aio_complete() freeing the req.
+ */
+static inline struct kiocb *aio_get_req(struct kioctx *ctx)
{
struct kiocb *req;
- if (list_empty(&batch->head))
- if (kiocb_batch_refill(ctx, batch) == 0)
- return NULL;
- req = list_first_entry(&batch->head, struct kiocb, ki_batch);
- list_del(&req->ki_batch);
+ if (!get_reqs_available(ctx))
+ return NULL;
+
+ req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
+ if (unlikely(!req))
+ goto out_put;
+
+ atomic_set(&req->ki_users, 2);
+ req->ki_ctx = ctx;
return req;
+out_put:
+ put_reqs_available(ctx, 1);
+ return NULL;
}
-static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
+static void kiocb_free(struct kiocb *req)
{
- assert_spin_locked(&ctx->ctx_lock);
-
+ if (req->ki_filp)
+ fput(req->ki_filp);
if (req->ki_eventfd != NULL)
eventfd_ctx_put(req->ki_eventfd);
if (req->ki_dtor)
@@ -539,48 +620,12 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
if (req->ki_iovec != &req->ki_inline_vec)
kfree(req->ki_iovec);
kmem_cache_free(kiocb_cachep, req);
- ctx->reqs_active--;
-
- if (unlikely(!ctx->reqs_active && ctx->dead))
- wake_up_all(&ctx->wait);
}
-/* __aio_put_req
- * Returns true if this put was the last user of the request.
- */
-static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
+void aio_put_req(struct kiocb *req)
{
- dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
- req, atomic_long_read(&req->ki_filp->f_count));
-
- assert_spin_locked(&ctx->ctx_lock);
-
- req->ki_users--;
- BUG_ON(req->ki_users < 0);
- if (likely(req->ki_users))
- return 0;
- list_del(&req->ki_list); /* remove from active_reqs */
- req->ki_cancel = NULL;
- req->ki_retry = NULL;
-
- fput(req->ki_filp);
- req->ki_filp = NULL;
- really_put_req(ctx, req);
- return 1;
-}
-
-/* aio_put_req
- * Returns true if this put was the last user of the kiocb,
- * false if the request is still in use.
- */
-int aio_put_req(struct kiocb *req)
-{
- struct kioctx *ctx = req->ki_ctx;
- int ret;
- spin_lock_irq(&ctx->ctx_lock);
- ret = __aio_put_req(ctx, req);
- spin_unlock_irq(&ctx->ctx_lock);
- return ret;
+ if (atomic_dec_and_test(&req->ki_users))
+ kiocb_free(req);
}
EXPORT_SYMBOL(aio_put_req);
@@ -588,631 +633,355 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
{
struct mm_struct *mm = current->mm;
struct kioctx *ctx, *ret = NULL;
- struct hlist_node *n;
rcu_read_lock();
- hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
- /*
- * RCU protects us against accessing freed memory but
- * we have to be careful not to get a reference when the
- * reference count already dropped to 0 (ctx->dead test
- * is unreliable because of races).
- */
- if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
+ hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list)
+ if (ctx->user_id == ctx_id){
+ percpu_ref_get(&ctx->users);
ret = ctx;
break;
}
- }
rcu_read_unlock();
return ret;
}
-/*
- * Queue up a kiocb to be retried. Assumes that the kiocb
- * has already been marked as kicked, and places it on
- * the retry run list for the corresponding ioctx, if it
- * isn't already queued. Returns 1 if it actually queued
- * the kiocb (to tell the caller to activate the work
- * queue to process it), or 0, if it found that it was
- * already queued.
- */
-static inline int __queue_kicked_iocb(struct kiocb *iocb)
+static inline unsigned kioctx_ring_put(struct kioctx *ctx, struct kiocb *req,
+ unsigned tail)
{
- struct kioctx *ctx = iocb->ki_ctx;
-
- assert_spin_locked(&ctx->ctx_lock);
+ struct io_event *ev_page, *event;
+ unsigned pos = tail + AIO_EVENTS_OFFSET;
- if (list_empty(&iocb->ki_run_list)) {
- list_add_tail(&iocb->ki_run_list,
- &ctx->run_list);
- return 1;
- }
- return 0;
-}
+ if (++tail >= ctx->nr)
+ tail = 0;
-/* aio_run_iocb
- * This is the core aio execution routine. It is
- * invoked both for initial i/o submission and
- * subsequent retries via the aio_kick_handler.
- * Expects to be invoked with iocb->ki_ctx->lock
- * already held. The lock is released and reacquired
- * as needed during processing.
- *
- * Calls the iocb retry method (already setup for the
- * iocb on initial submission) for operation specific
- * handling, but takes care of most of common retry
- * execution details for a given iocb. The retry method
- * needs to be non-blocking as far as possible, to avoid
- * holding up other iocbs waiting to be serviced by the
- * retry kernel thread.
- *
- * The trickier parts in this code have to do with
- * ensuring that only one retry instance is in progress
- * for a given iocb at any time. Providing that guarantee
- * simplifies the coding of individual aio operations as
- * it avoids various potential races.
- */
-static ssize_t aio_run_iocb(struct kiocb *iocb)
-{
- struct kioctx *ctx = iocb->ki_ctx;
- ssize_t (*retry)(struct kiocb *);
- ssize_t ret;
+ ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
+ event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- if (!(retry = iocb->ki_retry)) {
- printk("aio_run_iocb: iocb->ki_retry = NULL\n");
- return 0;
- }
+ event->obj = (u64)(unsigned long)req->ki_obj.user;
+ event->data = req->ki_user_data;
+ event->res = req->ki_res;
+ event->res2 = req->ki_res2;
- /*
- * We don't want the next retry iteration for this
- * operation to start until this one has returned and
- * updated the iocb state. However, wait_queue functions
- * can trigger a kick_iocb from interrupt context in the
- * meantime, indicating that data is available for the next
- * iteration. We want to remember that and enable the
- * next retry iteration _after_ we are through with
- * this one.
- *
- * So, in order to be able to register a "kick", but
- * prevent it from being queued now, we clear the kick
- * flag, but make the kick code *think* that the iocb is
- * still on the run list until we are actually done.
- * When we are done with this iteration, we check if
- * the iocb was kicked in the meantime and if so, queue
- * it up afresh.
- */
+ kunmap_atomic(ev_page);
+ flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
- kiocbClearKicked(iocb);
+ pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
+ ctx, tail, req, req->ki_obj.user, req->ki_user_data,
+ req->ki_res, req->ki_res2);
- /*
- * This is so that aio_complete knows it doesn't need to
- * pull the iocb off the run list (We can't just call
- * INIT_LIST_HEAD because we don't want a kick_iocb to
- * queue this on the run list yet)
- */
- iocb->ki_run_list.next = iocb->ki_run_list.prev = NULL;
- spin_unlock_irq(&ctx->ctx_lock);
+ return tail;
+}
- /* Quit retrying if the i/o has been cancelled */
- if (kiocbIsCancelled(iocb)) {
- ret = -EINTR;
- aio_complete(iocb, ret, 0);
- /* must not access the iocb after this */
- goto out;
- }
+static inline unsigned kioctx_ring_lock(struct kioctx *ctx)
+{
+ unsigned tail;
/*
- * Now we are all set to call the retry method in async
- * context.
+ * ctx->tail is both our lock and the canonical version of the tail
+ * pointer.
*/
- ret = retry(iocb);
-
- if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
- /*
- * There's no easy way to restart the syscall since other AIO's
- * may be already running. Just fail this IO with EINTR.
- */
- if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
- ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK))
- ret = -EINTR;
- aio_complete(iocb, ret, 0);
- }
-out:
- spin_lock_irq(&ctx->ctx_lock);
-
- if (-EIOCBRETRY == ret) {
- /*
- * OK, now that we are done with this iteration
- * and know that there is more left to go,
- * this is where we let go so that a subsequent
- * "kick" can start the next iteration
- */
-
- /* will make __queue_kicked_iocb succeed from here on */
- INIT_LIST_HEAD(&iocb->ki_run_list);
- /* we must queue the next iteration ourselves, if it
- * has already been kicked */
- if (kiocbIsKicked(iocb)) {
- __queue_kicked_iocb(iocb);
+ while ((tail = xchg(&ctx->tail, UINT_MAX)) == UINT_MAX)
+ cpu_relax();
- /*
- * __queue_kicked_iocb will always return 1 here, because
- * iocb->ki_run_list is empty at this point so it should
- * be safe to unconditionally queue the context into the
- * work queue.
- */
- aio_queue_work(ctx);
- }
- }
- return ret;
+ return tail;
}
-/*
- * __aio_run_iocbs:
- * Process all pending retries queued on the ioctx
- * run list.
- * Assumes it is operating within the aio issuer's mm
- * context.
- */
-static int __aio_run_iocbs(struct kioctx *ctx)
+static inline void kioctx_ring_unlock(struct kioctx *ctx, unsigned tail)
{
- struct kiocb *iocb;
- struct list_head run_list;
+ struct aio_ring *ring;
- assert_spin_locked(&ctx->ctx_lock);
+ if (!ctx)
+ return;
- list_replace_init(&ctx->run_list, &run_list);
- while (!list_empty(&run_list)) {
- iocb = list_entry(run_list.next, struct kiocb,
- ki_run_list);
- list_del(&iocb->ki_run_list);
- /*
- * Hold an extra reference while retrying i/o.
- */
- iocb->ki_users++; /* grab extra reference */
- aio_run_iocb(iocb);
- __aio_put_req(ctx, iocb);
- }
- if (!list_empty(&ctx->run_list))
- return 1;
- return 0;
-}
+ smp_wmb();
+ /* make event visible before updating tail */
-static void aio_queue_work(struct kioctx * ctx)
-{
- unsigned long timeout;
- /*
- * if someone is waiting, get the work started right
- * away, otherwise, use a longer delay
- */
- smp_mb();
- if (waitqueue_active(&ctx->wait))
- timeout = 1;
- else
- timeout = HZ/10;
- queue_delayed_work(aio_wq, &ctx->wq, timeout);
-}
+ ctx->shadow_tail = tail;
-/*
- * aio_run_all_iocbs:
- * Process all pending retries queued on the ioctx
- * run list, and keep running them until the list
- * stays empty.
- * Assumes it is operating within the aio issuer's mm context.
- */
-static inline void aio_run_all_iocbs(struct kioctx *ctx)
-{
- spin_lock_irq(&ctx->ctx_lock);
- while (__aio_run_iocbs(ctx))
- ;
- spin_unlock_irq(&ctx->ctx_lock);
-}
+ ring = kmap_atomic(ctx->ring_pages[0]);
+ ring->tail = tail;
+ kunmap_atomic(ring);
+ flush_dcache_page(ctx->ring_pages[0]);
-/*
- * aio_kick_handler:
- * Work queue handler triggered to process pending
- * retries on an ioctx. Takes on the aio issuer's
- * mm context before running the iocbs, so that
- * copy_xxx_user operates on the issuer's address
- * space.
- * Run on aiod's context.
- */
-static void aio_kick_handler(struct work_struct *work)
-{
- struct kioctx *ctx = container_of(work, struct kioctx, wq.work);
- mm_segment_t oldfs = get_fs();
- struct mm_struct *mm;
- int requeue;
+ /* unlock, make new tail visible before checking waitlist */
+ smp_mb();
- set_fs(USER_DS);
- use_mm(ctx->mm);
- spin_lock_irq(&ctx->ctx_lock);
- requeue =__aio_run_iocbs(ctx);
- mm = ctx->mm;
- spin_unlock_irq(&ctx->ctx_lock);
- unuse_mm(mm);
- set_fs(oldfs);
- /*
- * we're in a worker thread already; no point using non-zero delay
- */
- if (requeue)
- queue_delayed_work(aio_wq, &ctx->wq, 0);
-}
+ ctx->tail = tail;
+ if (waitqueue_active(&ctx->wait))
+ wake_up(&ctx->wait);
+}
-/*
- * Called by kick_iocb to queue the kiocb for retry
- * and if required activate the aio work queue to process
- * it
- */
-static void try_queue_kicked_iocb(struct kiocb *iocb)
+void batch_complete_aio(struct batch_complete *batch)
{
- struct kioctx *ctx = iocb->ki_ctx;
+ struct kioctx *ctx = NULL;
+ struct eventfd_ctx *eventfd = NULL;
+ struct rb_node *n;
unsigned long flags;
- int run = 0;
-
- spin_lock_irqsave(&ctx->ctx_lock, flags);
- /* set this inside the lock so that we can't race with aio_run_iocb()
- * testing it and putting the iocb on the run list under the lock */
- if (!kiocbTryKick(iocb))
- run = __queue_kicked_iocb(iocb);
- spin_unlock_irqrestore(&ctx->ctx_lock, flags);
- if (run)
- aio_queue_work(ctx);
-}
+ unsigned tail = 0;
-/*
- * kick_iocb:
- * Called typically from a wait queue callback context
- * to trigger a retry of the iocb.
- * The retry is usually executed by aio workqueue
- * threads (See aio_kick_handler).
- */
-void kick_iocb(struct kiocb *iocb)
-{
- /* sync iocbs are easy: they can only ever be executing from a
- * single context. */
- if (is_sync_kiocb(iocb)) {
- kiocbSetKicked(iocb);
- wake_up_process(iocb->ki_obj.tsk);
+ if (RB_EMPTY_ROOT(&batch->kiocb))
return;
- }
-
- try_queue_kicked_iocb(iocb);
-}
-EXPORT_SYMBOL(kick_iocb);
-
-/* aio_complete
- * Called when the io request on the given iocb is complete.
- * Returns true if this is the last user of the request. The
- * only other user of the request can be the cancellation code.
- */
-int aio_complete(struct kiocb *iocb, long res, long res2)
-{
- struct kioctx *ctx = iocb->ki_ctx;
- struct aio_ring_info *info;
- struct aio_ring *ring;
- struct io_event *event;
- unsigned long flags;
- unsigned long tail;
- int ret;
/*
- * Special case handling for sync iocbs:
- * - events go directly into the iocb for fast handling
- * - the sync task with the iocb in its stack holds the single iocb
- * ref, no other paths have a way to get another ref
- * - the sync task helpfully left a reference to itself in the iocb
+ * Take rcu_read_lock() in case the kioctx is being destroyed, as we
+ * need to issue a wakeup after incrementing reqs_available.
*/
- if (is_sync_kiocb(iocb)) {
- BUG_ON(iocb->ki_users != 1);
- iocb->ki_user_data = res;
- iocb->ki_users = 0;
- wake_up_process(iocb->ki_obj.tsk);
- return 1;
- }
-
- info = &ctx->ring_info;
+ rcu_read_lock();
+ local_irq_save(flags);
- /* add a completion event to the ring buffer.
- * must be done holding ctx->ctx_lock to prevent
- * other code from messing with the tail
- * pointer since we might be called from irq
- * context.
- */
- spin_lock_irqsave(&ctx->ctx_lock, flags);
+ n = rb_first(&batch->kiocb);
+ while (n) {
+ struct kiocb *req = container_of(n, struct kiocb, ki_node);
- if (iocb->ki_run_list.prev && !list_empty(&iocb->ki_run_list))
- list_del_init(&iocb->ki_run_list);
+ if (n->rb_right) {
+ n->rb_right->__rb_parent_color = n->__rb_parent_color;
+ n = n->rb_right;
- /*
- * cancelled requests don't get events, userland was given one
- * when the event got cancelled.
- */
- if (kiocbIsCancelled(iocb))
- goto put_rq;
+ while (n->rb_left)
+ n = n->rb_left;
+ } else {
+ n = rb_parent(n);
+ }
- ring = kmap_atomic(info->ring_pages[0]);
+ if (unlikely(xchg(&req->ki_cancel,
+ KIOCB_CANCELLED) == KIOCB_CANCELLED)) {
+ /*
+ * Can't use the percpu reqs_available here - could race
+ * with free_ioctx()
+ */
+ atomic_inc(&req->ki_ctx->reqs_available);
+ aio_put_req(req);
+ continue;
+ }
- tail = info->tail;
- event = aio_ring_event(info, tail);
- if (++tail >= info->nr)
- tail = 0;
+ if (unlikely(req->ki_eventfd != eventfd)) {
+ if (eventfd) {
+ /* Make event visible */
+ kioctx_ring_unlock(ctx, tail);
+ ctx = NULL;
- event->obj = (u64)(unsigned long)iocb->ki_obj.user;
- event->data = iocb->ki_user_data;
- event->res = res;
- event->res2 = res2;
+ eventfd_signal(eventfd, 1);
+ eventfd_ctx_put(eventfd);
+ }
- dprintk("aio_complete: %p[%lu]: %p: %p %Lx %lx %lx\n",
- ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
- res, res2);
+ eventfd = req->ki_eventfd;
+ req->ki_eventfd = NULL;
+ }
- /* after flagging the request as done, we
- * must never even look at it again
- */
- smp_wmb(); /* make event visible before updating tail */
+ if (unlikely(req->ki_ctx != ctx)) {
+ kioctx_ring_unlock(ctx, tail);
- info->tail = tail;
- ring->tail = tail;
+ ctx = req->ki_ctx;
+ tail = kioctx_ring_lock(ctx);
+ }
- put_aio_ring_event(event);
- kunmap_atomic(ring);
+ tail = kioctx_ring_put(ctx, req, tail);
+ aio_put_req(req);
+ }
- pr_debug("added to ring %p at [%lu]\n", iocb, tail);
+ kioctx_ring_unlock(ctx, tail);
+ local_irq_restore(flags);
+ rcu_read_unlock();
/*
* Check if the user asked us to deliver the result through an
* eventfd. The eventfd_signal() function is safe to be called
* from IRQ context.
*/
- if (iocb->ki_eventfd != NULL)
- eventfd_signal(iocb->ki_eventfd, 1);
+ if (eventfd) {
+ eventfd_signal(eventfd, 1);
+ eventfd_ctx_put(eventfd);
+ }
+}
+EXPORT_SYMBOL(batch_complete_aio);
+
+/* aio_complete_batch
+ * Called when the io request on the given iocb is complete; @batch may be
+ * NULL.
+ */
+void aio_complete_batch(struct kiocb *req, long res, long res2,
+ struct batch_complete *batch)
+{
+ req->ki_res = res;
+ req->ki_res2 = res2;
-put_rq:
- /* everything turned out well, dispose of the aiocb. */
- ret = __aio_put_req(ctx, iocb);
+ if (req->ki_list.next) {
+ struct kioctx *ctx = req->ki_ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+ list_del(&req->ki_list);
+ spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+ }
/*
- * We have to order our ring_info tail store above and test
- * of the wait list below outside the wait lock. This is
- * like in wake_up_bit() where clearing a bit has to be
- * ordered with the unlocked test.
+ * Special case handling for sync iocbs:
+ * - events go directly into the iocb for fast handling
+ * - the sync task with the iocb in its stack holds the single iocb
+ * ref, no other paths have a way to get another ref
+ * - the sync task helpfully left a reference to itself in the iocb
*/
- smp_mb();
+ if (is_sync_kiocb(req)) {
+ BUG_ON(atomic_read(&req->ki_users) != 1);
+ req->ki_user_data = req->ki_res;
+ atomic_set(&req->ki_users, 0);
+ wake_up_process(req->ki_obj.tsk);
+ } else if (batch) {
+ int res;
+ struct kiocb *t;
+ struct rb_node **n = &batch->kiocb.rb_node, *parent = NULL;
+
+ while (*n) {
+ parent = *n;
+ t = container_of(*n, struct kiocb, ki_node);
+
+ res = req->ki_ctx != t->ki_ctx
+ ? req->ki_ctx < t->ki_ctx
+ : req->ki_eventfd != t->ki_eventfd
+ ? req->ki_eventfd < t->ki_eventfd
+ : req < t;
+
+ n = res ? &(*n)->rb_left : &(*n)->rb_right;
+ }
- if (waitqueue_active(&ctx->wait))
- wake_up(&ctx->wait);
+ rb_link_node(&req->ki_node, parent, n);
+ rb_insert_color(&req->ki_node, &batch->kiocb);
+ } else {
+ struct batch_complete batch_stack;
- spin_unlock_irqrestore(&ctx->ctx_lock, flags);
- return ret;
+ memset(&req->ki_node, 0, sizeof(req->ki_node));
+ batch_stack.kiocb.rb_node = &req->ki_node;
+
+ batch_complete_aio(&batch_stack);
+ }
}
-EXPORT_SYMBOL(aio_complete);
+EXPORT_SYMBOL(aio_complete_batch);
-/* aio_read_evt
- * Pull an event off of the ioctx's event ring. Returns the number of
- * events fetched (0 or 1 ;-)
- * FIXME: make this use cmpxchg.
- * TODO: make the ringbuffer user mmap()able (requires FIXME).
+/* aio_read_events
+ * Pull an event off of the ioctx's event ring. Returns the number of
+ * events fetched
*/
-static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
+static long aio_read_events_ring(struct kioctx *ctx,
+ struct io_event __user *event, long nr)
{
- struct aio_ring_info *info = &ioctx->ring_info;
struct aio_ring *ring;
- unsigned long head;
- int ret = 0;
-
- ring = kmap_atomic(info->ring_pages[0]);
- dprintk("in aio_read_evt h%lu t%lu m%lu\n",
- (unsigned long)ring->head, (unsigned long)ring->tail,
- (unsigned long)ring->nr);
+ unsigned head, pos;
+ long ret = 0;
+ int copy_ret;
- if (ring->head == ring->tail)
- goto out;
+ mutex_lock(&ctx->ring_lock);
- spin_lock(&info->ring_lock);
-
- head = ring->head % info->nr;
- if (head != ring->tail) {
- struct io_event *evp = aio_ring_event(info, head);
- *ent = *evp;
- head = (head + 1) % info->nr;
- smp_mb(); /* finish reading the event before updatng the head */
- ring->head = head;
- ret = 1;
- put_aio_ring_event(evp);
- }
- spin_unlock(&info->ring_lock);
-
-out:
+ ring = kmap_atomic(ctx->ring_pages[0]);
+ head = ring->head;
kunmap_atomic(ring);
- dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret,
- (unsigned long)ring->head, (unsigned long)ring->tail);
- return ret;
-}
-struct aio_timeout {
- struct timer_list timer;
- int timed_out;
- struct task_struct *p;
-};
-
-static void timeout_func(unsigned long data)
-{
- struct aio_timeout *to = (struct aio_timeout *)data;
+ pr_debug("h%u t%u m%u\n", head, ctx->shadow_tail, ctx->nr);
- to->timed_out = 1;
- wake_up_process(to->p);
-}
+ if (head == ctx->shadow_tail)
+ goto out;
-static inline void init_timeout(struct aio_timeout *to)
-{
- setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to);
- to->timed_out = 0;
- to->p = current;
-}
+ while (ret < nr) {
+ long avail = (head < ctx->shadow_tail
+ ? ctx->shadow_tail : ctx->nr) - head;
+ struct io_event *ev;
+ struct page *page;
-static inline void set_timeout(long start_jiffies, struct aio_timeout *to,
- const struct timespec *ts)
-{
- to->timer.expires = start_jiffies + timespec_to_jiffies(ts);
- if (time_after(to->timer.expires, jiffies))
- add_timer(&to->timer);
- else
- to->timed_out = 1;
-}
+ if (head == ctx->shadow_tail)
+ break;
-static inline void clear_timeout(struct aio_timeout *to)
-{
- del_singleshot_timer_sync(&to->timer);
-}
+ avail = min(avail, nr - ret);
+ avail = min_t(long, avail, AIO_EVENTS_PER_PAGE -
+ ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
-static int read_events(struct kioctx *ctx,
- long min_nr, long nr,
- struct io_event __user *event,
- struct timespec __user *timeout)
-{
- long start_jiffies = jiffies;
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- int ret;
- int i = 0;
- struct io_event ent;
- struct aio_timeout to;
- int retry = 0;
-
- /* needed to zero any padding within an entry (there shouldn't be
- * any, but C is fun!
- */
- memset(&ent, 0, sizeof(ent));
-retry:
- ret = 0;
- while (likely(i < nr)) {
- ret = aio_read_evt(ctx, &ent);
- if (unlikely(ret <= 0))
- break;
+ pos = head + AIO_EVENTS_OFFSET;
+ page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
+ pos %= AIO_EVENTS_PER_PAGE;
- dprintk("read event: %Lx %Lx %Lx %Lx\n",
- ent.data, ent.obj, ent.res, ent.res2);
+ ev = kmap(page);
+ copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * avail);
+ kunmap(page);
- /* Could we split the check in two? */
- ret = -EFAULT;
- if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) {
- dprintk("aio: lost an event due to EFAULT.\n");
- break;
+ if (unlikely(copy_ret)) {
+ ret = -EFAULT;
+ goto out;
}
- ret = 0;
- /* Good, event copied to userland, update counts. */
- event ++;
- i ++;
+ ret += avail;
+ head += avail;
+ head %= ctx->nr;
}
- if (min_nr <= i)
- return i;
- if (ret)
- return ret;
-
- /* End fast path */
+ ring = kmap_atomic(ctx->ring_pages[0]);
+ ring->head = head;
+ kunmap_atomic(ring);
+ flush_dcache_page(ctx->ring_pages[0]);
- /* racey check, but it gets redone */
- if (!retry && unlikely(!list_empty(&ctx->run_list))) {
- retry = 1;
- aio_run_all_iocbs(ctx);
- goto retry;
- }
+ pr_debug("%li h%u t%u\n", ret, head, ctx->shadow_tail);
- init_timeout(&to);
- if (timeout) {
- struct timespec ts;
- ret = -EFAULT;
- if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
- goto out;
-
- set_timeout(start_jiffies, &to, &ts);
- }
+ put_reqs_available(ctx, ret);
+out:
+ mutex_unlock(&ctx->ring_lock);
- while (likely(i < nr)) {
- add_wait_queue_exclusive(&ctx->wait, &wait);
- do {
- set_task_state(tsk, TASK_INTERRUPTIBLE);
- ret = aio_read_evt(ctx, &ent);
- if (ret)
- break;
- if (min_nr <= i)
- break;
- if (unlikely(ctx->dead)) {
- ret = -EINVAL;
- break;
- }
- if (to.timed_out) /* Only check after read evt */
- break;
- /* Try to only show up in io wait if there are ops
- * in flight */
- if (ctx->reqs_active)
- io_schedule();
- else
- schedule();
- if (signal_pending(tsk)) {
- ret = -EINTR;
- break;
- }
- /*ret = aio_read_evt(ctx, &ent);*/
- } while (1) ;
+ return ret;
+}
- set_task_state(tsk, TASK_RUNNING);
- remove_wait_queue(&ctx->wait, &wait);
+static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
+ struct io_event __user *event, long *i)
+{
+ long ret = aio_read_events_ring(ctx, event + *i, nr - *i);
- if (unlikely(ret <= 0))
- break;
+ if (ret > 0)
+ *i += ret;
- ret = -EFAULT;
- if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) {
- dprintk("aio: lost an event due to EFAULT.\n");
- break;
- }
+ if (unlikely(percpu_ref_dead(&ctx->users)))
+ ret = -EINVAL;
- /* Good, event copied to userland, update counts. */
- event ++;
- i ++;
- }
+ if (!*i)
+ *i = ret;
- if (timeout)
- clear_timeout(&to);
-out:
- destroy_timer_on_stack(&to.timer);
- return i ? i : ret;
+ return ret < 0 || *i >= min_nr;
}
-/* Take an ioctx and remove it from the list of ioctx's. Protects
- * against races with itself via ->dead.
- */
-static void io_destroy(struct kioctx *ioctx)
+static long read_events(struct kioctx *ctx, long min_nr, long nr,
+ struct io_event __user *event,
+ struct timespec __user *timeout)
{
- struct mm_struct *mm = current->mm;
- int was_dead;
+ ktime_t until = { .tv64 = KTIME_MAX };
+ long ret = 0;
- /* delete the entry from the list is someone else hasn't already */
- spin_lock(&mm->ioctx_lock);
- was_dead = ioctx->dead;
- ioctx->dead = 1;
- hlist_del_rcu(&ioctx->list);
- spin_unlock(&mm->ioctx_lock);
+ if (timeout) {
+ struct timespec ts;
- dprintk("aio_release(%p)\n", ioctx);
- if (likely(!was_dead))
- put_ioctx(ioctx); /* twice for the list */
+ if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
+ return -EFAULT;
- kill_ctx(ioctx);
+ until = timespec_to_ktime(ts);
+ }
/*
- * Wake up any waiters. The setting of ctx->dead must be seen
- * by other CPUs at this point. Right now, we rely on the
- * locking done by the above calls to ensure this consistency.
+ * Note that aio_read_events() is being called as the conditional - i.e.
+ * we're calling it after prepare_to_wait() has set task state to
+ * TASK_INTERRUPTIBLE.
+ *
+ * But aio_read_events() can block, and if it blocks it's going to flip
+ * the task state back to TASK_RUNNING.
+ *
+ * This should be ok, provided it doesn't flip the state back to
+ * TASK_RUNNING and return 0 too much - that causes us to spin. That
+ * will only happen if the mutex_lock() call blocks, and we then find
+ * the ringbuffer empty. So in practice we should be ok, but it's
+ * something to be aware of when touching this code.
*/
- wake_up_all(&ioctx->wait);
+ wait_event_interruptible_hrtimeout(ctx->wait,
+ aio_read_events(ctx, min_nr, nr, event, &ret), until);
+
+ if (!ret && signal_pending(current))
+ ret = -EINTR;
+
+ return ret;
}
/* sys_io_setup:
@@ -1250,7 +1019,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
if (ret)
- io_destroy(ioctx);
+ kill_ioctx(ioctx);
put_ioctx(ioctx);
}
@@ -1268,7 +1037,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
{
struct kioctx *ioctx = lookup_ioctx(ctx);
if (likely(NULL != ioctx)) {
- io_destroy(ioctx);
+ kill_ioctx(ioctx);
put_ioctx(ioctx);
return 0;
}
@@ -1299,24 +1068,15 @@ static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret)
BUG_ON(ret > 0 && iocb->ki_left == 0);
}
-static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
+typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+
+static ssize_t aio_rw_vect_retry(struct kiocb *iocb, int rw, aio_rw_op *rw_op)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- ssize_t (*rw_op)(struct kiocb *, const struct iovec *,
- unsigned long, loff_t);
ssize_t ret = 0;
- unsigned short opcode;
-
- if ((iocb->ki_opcode == IOCB_CMD_PREADV) ||
- (iocb->ki_opcode == IOCB_CMD_PREAD)) {
- rw_op = file->f_op->aio_read;
- opcode = IOCB_CMD_PREADV;
- } else {
- rw_op = file->f_op->aio_write;
- opcode = IOCB_CMD_PWRITEV;
- }
/* This matches the pread()/pwrite() logic */
if (iocb->ki_pos < 0)
@@ -1332,7 +1092,7 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
/* retry all partial writes. retry partial reads as long as its a
* regular file. */
} while (ret > 0 && iocb->ki_left > 0 &&
- (opcode == IOCB_CMD_PWRITEV ||
+ (rw == WRITE ||
(!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));
/* This means we must have transferred all that we could */
@@ -1342,81 +1102,49 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
/* If we managed to write some out we return that, rather than
* the eventual error. */
- if (opcode == IOCB_CMD_PWRITEV
- && ret < 0 && ret != -EIOCBQUEUED && ret != -EIOCBRETRY
+ if (rw == WRITE
+ && ret < 0 && ret != -EIOCBQUEUED
&& iocb->ki_nbytes - iocb->ki_left)
ret = iocb->ki_nbytes - iocb->ki_left;
return ret;
}
-static ssize_t aio_fdsync(struct kiocb *iocb)
-{
- struct file *file = iocb->ki_filp;
- ssize_t ret = -EINVAL;
-
- if (file->f_op->aio_fsync)
- ret = file->f_op->aio_fsync(iocb, 1);
- return ret;
-}
-
-static ssize_t aio_fsync(struct kiocb *iocb)
-{
- struct file *file = iocb->ki_filp;
- ssize_t ret = -EINVAL;
-
- if (file->f_op->aio_fsync)
- ret = file->f_op->aio_fsync(iocb, 0);
- return ret;
-}
-
-static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat)
+static ssize_t aio_setup_vectored_rw(int rw, struct kiocb *kiocb, bool compat)
{
ssize_t ret;
+ kiocb->ki_nr_segs = kiocb->ki_nbytes;
+
#ifdef CONFIG_COMPAT
if (compat)
- ret = compat_rw_copy_check_uvector(type,
+ ret = compat_rw_copy_check_uvector(rw,
(struct compat_iovec __user *)kiocb->ki_buf,
- kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
+ kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec,
&kiocb->ki_iovec);
else
#endif
- ret = rw_copy_check_uvector(type,
+ ret = rw_copy_check_uvector(rw,
(struct iovec __user *)kiocb->ki_buf,
- kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
+ kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec,
&kiocb->ki_iovec);
if (ret < 0)
- goto out;
-
- ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret);
- if (ret < 0)
- goto out;
+ return ret;
- kiocb->ki_nr_segs = kiocb->ki_nbytes;
- kiocb->ki_cur_seg = 0;
- /* ki_nbytes/left now reflect bytes instead of segs */
+ /* ki_nbytes now reflect bytes instead of segs */
kiocb->ki_nbytes = ret;
- kiocb->ki_left = ret;
-
- ret = 0;
-out:
- return ret;
+ return 0;
}
-static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb)
+static ssize_t aio_setup_single_vector(int rw, struct kiocb *kiocb)
{
- int bytes;
-
- bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left);
- if (bytes < 0)
- return bytes;
+ if (unlikely(!access_ok(!rw, kiocb->ki_buf, kiocb->ki_nbytes)))
+ return -EFAULT;
kiocb->ki_iovec = &kiocb->ki_inline_vec;
kiocb->ki_iovec->iov_base = kiocb->ki_buf;
- kiocb->ki_iovec->iov_len = bytes;
+ kiocb->ki_iovec->iov_len = kiocb->ki_nbytes;
kiocb->ki_nr_segs = 1;
- kiocb->ki_cur_seg = 0;
return 0;
}
@@ -1425,96 +1153,94 @@ static ssize_t aio_setup_single_vector(int type, struct file * file, struct kioc
* Performs the initial checks and aio retry method
* setup for the kiocb at the time of io submission.
*/
-static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
+static ssize_t aio_run_iocb(struct kiocb *req, bool compat)
{
- struct file *file = kiocb->ki_filp;
- ssize_t ret = 0;
+ struct file *file = req->ki_filp;
+ ssize_t ret;
+ int rw;
+ fmode_t mode;
+ aio_rw_op *rw_op;
- switch (kiocb->ki_opcode) {
+ switch (req->ki_opcode) {
case IOCB_CMD_PREAD:
- ret = -EBADF;
- if (unlikely(!(file->f_mode & FMODE_READ)))
- break;
- ret = -EFAULT;
- if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf,
- kiocb->ki_left)))
- break;
- ret = aio_setup_single_vector(READ, file, kiocb);
- if (ret)
- break;
- ret = -EINVAL;
- if (file->f_op->aio_read)
- kiocb->ki_retry = aio_rw_vect_retry;
- break;
- case IOCB_CMD_PWRITE:
- ret = -EBADF;
- if (unlikely(!(file->f_mode & FMODE_WRITE)))
- break;
- ret = -EFAULT;
- if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf,
- kiocb->ki_left)))
- break;
- ret = aio_setup_single_vector(WRITE, file, kiocb);
- if (ret)
- break;
- ret = -EINVAL;
- if (file->f_op->aio_write)
- kiocb->ki_retry = aio_rw_vect_retry;
- break;
case IOCB_CMD_PREADV:
- ret = -EBADF;
- if (unlikely(!(file->f_mode & FMODE_READ)))
- break;
- ret = aio_setup_vectored_rw(READ, kiocb, compat);
- if (ret)
- break;
- ret = -EINVAL;
- if (file->f_op->aio_read)
- kiocb->ki_retry = aio_rw_vect_retry;
- break;
+ mode = FMODE_READ;
+ rw = READ;
+ rw_op = file->f_op->aio_read;
+ goto rw_common;
+
+ case IOCB_CMD_PWRITE:
case IOCB_CMD_PWRITEV:
- ret = -EBADF;
- if (unlikely(!(file->f_mode & FMODE_WRITE)))
- break;
- ret = aio_setup_vectored_rw(WRITE, kiocb, compat);
+ mode = FMODE_WRITE;
+ rw = WRITE;
+ rw_op = file->f_op->aio_write;
+ goto rw_common;
+rw_common:
+ if (unlikely(!(file->f_mode & mode)))
+ return -EBADF;
+
+ if (!rw_op)
+ return -EINVAL;
+
+ ret = (req->ki_opcode == IOCB_CMD_PREADV ||
+ req->ki_opcode == IOCB_CMD_PWRITEV)
+ ? aio_setup_vectored_rw(rw, req, compat)
+ : aio_setup_single_vector(rw, req);
if (ret)
- break;
- ret = -EINVAL;
- if (file->f_op->aio_write)
- kiocb->ki_retry = aio_rw_vect_retry;
+ return ret;
+
+ ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+ if (ret < 0)
+ return ret;
+
+ req->ki_nbytes = ret;
+ req->ki_left = ret;
+
+ ret = aio_rw_vect_retry(req, rw, rw_op);
break;
+
case IOCB_CMD_FDSYNC:
- ret = -EINVAL;
- if (file->f_op->aio_fsync)
- kiocb->ki_retry = aio_fdsync;
+ if (!file->f_op->aio_fsync)
+ return -EINVAL;
+
+ ret = file->f_op->aio_fsync(req, 1);
break;
+
case IOCB_CMD_FSYNC:
- ret = -EINVAL;
- if (file->f_op->aio_fsync)
- kiocb->ki_retry = aio_fsync;
+ if (!file->f_op->aio_fsync)
+ return -EINVAL;
+
+ ret = file->f_op->aio_fsync(req, 0);
break;
+
default:
- dprintk("EINVAL: io_submit: no operation provided\n");
- ret = -EINVAL;
+ pr_debug("EINVAL: no operation provided\n");
+ return -EINVAL;
}
- if (!kiocb->ki_retry)
- return ret;
+ if (ret != -EIOCBQUEUED) {
+ /*
+ * There's no easy way to restart the syscall since other AIO's
+ * may be already running. Just fail this IO with EINTR.
+ */
+ if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
+ ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK))
+ ret = -EINTR;
+ aio_complete(req, ret, 0);
+ }
return 0;
}
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- struct iocb *iocb, struct kiocb_batch *batch,
- bool compat)
+ struct iocb *iocb, bool compat)
{
struct kiocb *req;
- struct file *file;
ssize_t ret;
/* enforce forwards compatibility on users */
if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
- pr_debug("EINVAL: io_submit: reserve field set\n");
+ pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
@@ -1528,16 +1254,16 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
return -EINVAL;
}
- file = fget(iocb->aio_fildes);
- if (unlikely(!file))
- return -EBADF;
-
- req = aio_get_req(ctx, batch); /* returns with 2 references to req */
- if (unlikely(!req)) {
- fput(file);
+ req = aio_get_req(ctx);
+ if (unlikely(!req))
return -EAGAIN;
+
+ req->ki_filp = fget(iocb->aio_fildes);
+ if (unlikely(!req->ki_filp)) {
+ ret = -EBADF;
+ goto out_put_req;
}
- req->ki_filp = file;
+
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
@@ -1553,9 +1279,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
}
}
- ret = put_user(req->ki_key, &user_iocb->aio_key);
+ ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
if (unlikely(ret)) {
- dprintk("EFAULT: aio_key\n");
+ pr_debug("EFAULT: aio_key\n");
goto out_put_req;
}
@@ -1567,41 +1293,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
req->ki_opcode = iocb->aio_lio_opcode;
- ret = aio_setup_iocb(req, compat);
-
+ ret = aio_run_iocb(req, compat);
if (ret)
goto out_put_req;
- spin_lock_irq(&ctx->ctx_lock);
- /*
- * We could have raced with io_destroy() and are currently holding a
- * reference to ctx which should be destroyed. We cannot submit IO
- * since ctx gets freed as soon as io_submit() puts its reference. The
- * check here is reliable: io_destroy() sets ctx->dead before waiting
- * for outstanding IO and the barrier between these two is realized by
- * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
- * increment ctx->reqs_active before checking for ctx->dead and the
- * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
- * don't see ctx->dead set here, io_destroy() waits for our IO to
- * finish.
- */
- if (ctx->dead) {
- spin_unlock_irq(&ctx->ctx_lock);
- ret = -EINVAL;
- goto out_put_req;
- }
- aio_run_iocb(req);
- if (!list_empty(&ctx->run_list)) {
- /* drain the run list */
- while (__aio_run_iocbs(ctx))
- ;
- }
- spin_unlock_irq(&ctx->ctx_lock);
-
aio_put_req(req); /* drop extra ref to req */
return 0;
-
out_put_req:
+ put_reqs_available(ctx, 1);
aio_put_req(req); /* drop extra ref to req */
aio_put_req(req); /* drop i/o ref to req */
return ret;
@@ -1614,7 +1313,6 @@ long do_io_submit(aio_context_t ctx_id, long nr,
long ret = 0;
int i = 0;
struct blk_plug plug;
- struct kiocb_batch batch;
if (unlikely(nr < 0))
return -EINVAL;
@@ -1627,12 +1325,10 @@ long do_io_submit(aio_context_t ctx_id, long nr,
ctx = lookup_ioctx(ctx_id);
if (unlikely(!ctx)) {
- pr_debug("EINVAL: io_submit: invalid context id\n");
+ pr_debug("EINVAL: invalid context id\n");
return -EINVAL;
}
- kiocb_batch_init(&batch, nr);
-
blk_start_plug(&plug);
/*
@@ -1653,13 +1349,12 @@ long do_io_submit(aio_context_t ctx_id, long nr,
break;
}
- ret = io_submit_one(ctx, user_iocb, &tmp, &batch, compat);
+ ret = io_submit_one(ctx, user_iocb, &tmp, compat);
if (ret)
break;
}
blk_finish_plug(&plug);
- kiocb_batch_free(ctx, &batch);
put_ioctx(ctx);
return i ? i : ret;
}
@@ -1692,10 +1387,13 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
assert_spin_locked(&ctx->ctx_lock);
+ if (key != KIOCB_KEY)
+ return NULL;
+
/* TODO: use a hash or array, this sucks. */
list_for_each(pos, &ctx->active_reqs) {
struct kiocb *kiocb = list_kiocb(pos);
- if (kiocb->ki_obj.user == iocb && kiocb->ki_key == key)
+ if (kiocb->ki_obj.user == iocb)
return kiocb;
}
return NULL;
@@ -1714,7 +1412,7 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct io_event __user *, result)
{
- int (*cancel)(struct kiocb *iocb, struct io_event *res);
+ struct io_event res;
struct kioctx *ctx;
struct kiocb *kiocb;
u32 key;
@@ -1729,32 +1427,22 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
return -EINVAL;
spin_lock_irq(&ctx->ctx_lock);
- ret = -EAGAIN;
+
kiocb = lookup_kiocb(ctx, iocb, key);
- if (kiocb && kiocb->ki_cancel) {
- cancel = kiocb->ki_cancel;
- kiocb->ki_users ++;
- kiocbSetCancelled(kiocb);
- } else
- cancel = NULL;
+ if (kiocb)
+ ret = kiocb_cancel(ctx, kiocb, &res);
+ else
+ ret = -EINVAL;
+
spin_unlock_irq(&ctx->ctx_lock);
- if (NULL != cancel) {
- struct io_event tmp;
- pr_debug("calling cancel\n");
- memset(&tmp, 0, sizeof(tmp));
- tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user;
- tmp.data = kiocb->ki_user_data;
- ret = cancel(kiocb, &tmp);
- if (!ret) {
- /* Cancellation succeeded -- copy the result
- * into the user's buffer.
- */
- if (copy_to_user(result, &tmp, sizeof(tmp)))
- ret = -EFAULT;
- }
- } else
- ret = -EINVAL;
+ if (!ret) {
+ /* Cancellation succeeded -- copy the result
+ * into the user's buffer.
+ */
+ if (copy_to_user(result, &res, sizeof(res)))
+ ret = -EFAULT;
+ }
put_ioctx(ctx);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c93447604da8..847ee4dd72a4 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -587,7 +587,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
/* This allows root to remove symlinks */
if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return -EPERM;
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs4_dentry_ino(dentry->d_parent);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 6043567b95c2..692e75ca6415 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -256,8 +256,6 @@ static int load_aout_binary(struct linux_binprm * bprm)
(current->mm->start_data = N_DATADDR(ex));
current->mm->brk = ex.a_bss +
(current->mm->start_brk = N_BSSADDR(ex));
- current->mm->free_area_cache = current->mm->mmap_base;
- current->mm->cached_hole_size = 0;
retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
if (retval < 0) {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index da19fc7c00b2..d1777a445841 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -140,6 +140,25 @@ static int padzero(unsigned long elf_bss)
#define ELF_BASE_PLATFORM NULL
#endif
+/*
+ * Use get_random_int() to implement AT_RANDOM while avoiding depletion
+ * of the entropy pool.
+ */
+static void get_atrandom_bytes(unsigned char *buf, size_t nbytes)
+{
+ unsigned char *p = buf;
+
+ while (nbytes) {
+ unsigned int random_variable;
+ size_t chunk = min(nbytes, sizeof(random_variable));
+
+ random_variable = get_random_int();
+ memcpy(p, &random_variable, chunk);
+ p += chunk;
+ nbytes -= chunk;
+ }
+}
+
static int
create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
unsigned long load_addr, unsigned long interp_load_addr)
@@ -201,7 +220,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
/*
* Generate 16 random bytes for userspace PRNG seeding.
*/
- get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
+ get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes));
u_rand_bytes = (elf_addr_t __user *)
STACK_ALLOC(p, sizeof(k_rand_bytes));
if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
@@ -735,8 +754,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
- current->mm->free_area_cache = current->mm->mmap_base;
- current->mm->cached_hole_size = 0;
retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
executable_stack);
if (retval < 0) {
@@ -1253,7 +1270,7 @@ static int writenote(struct memelfnote *men, struct file *file,
#undef DUMP_WRITE
static void fill_elf_header(struct elfhdr *elf, int segs,
- u16 machine, u32 flags, u8 osabi)
+ u16 machine, u32 flags)
{
memset(elf, 0, sizeof(*elf));
@@ -1638,7 +1655,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
* Initialize the ELF file header.
*/
fill_elf_header(elf, phdrs,
- view->e_machine, view->e_flags, view->ei_osabi);
+ view->e_machine, view->e_flags);
/*
* Allocate a structure for each thread.
@@ -1878,7 +1895,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
elf_core_copy_regs(&info->prstatus->pr_reg, regs);
/* Set up header */
- fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
+ fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
/*
* Set up the notes in similar form to SVR4 core dumps made
diff --git a/fs/bio.c b/fs/bio.c
index bb5768f59b32..0cc1b5562abc 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -27,6 +27,7 @@
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
+#include <linux/aio.h>
#include <scsi/sg.h> /* for struct sg_iovec */
#include <trace/events/block.h>
@@ -1407,33 +1408,44 @@ void bio_flush_dcache_pages(struct bio *bi)
EXPORT_SYMBOL(bio_flush_dcache_pages);
#endif
-/**
- * bio_endio - end I/O on a bio
- * @bio: bio
- * @error: error, if any
- *
- * Description:
- * bio_endio() will end I/O on the whole bio. bio_endio() is the
- * preferred way to end I/O on a bio, it takes care of clearing
- * BIO_UPTODATE on error. @error is 0 on success, and and one of the
- * established -Exxxx (-EIO, for instance) error values in case
- * something went wrong. No one should call bi_end_io() directly on a
- * bio unless they own it and thus know that it has an end_io
- * function.
- **/
-void bio_endio(struct bio *bio, int error)
+static inline void __bio_endio(struct bio *bio, struct batch_complete *batch)
{
- if (error)
+ if (bio->bi_error)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- error = -EIO;
+ bio->bi_error = -EIO;
+
+ if (bio_flagged(bio, BIO_BATCH_ENDIO))
+ bio->bi_batch_end_io(bio, bio->bi_error, batch);
+ else if (bio->bi_end_io)
+ bio->bi_end_io(bio, bio->bi_error);
+}
+
+void bio_endio_batch(struct bio *bio, int error, struct batch_complete *batch)
+{
+ if (error)
+ bio->bi_error = error;
trace_block_bio_complete(bio, error);
- if (bio->bi_end_io)
- bio->bi_end_io(bio, error);
+ if (batch)
+ bio_list_add(&batch->bio, bio);
+ else
+ __bio_endio(bio, batch);
+
+}
+EXPORT_SYMBOL(bio_endio_batch);
+
+void batch_complete(struct batch_complete *batch)
+{
+ struct bio *bio;
+
+ while ((bio = bio_list_pop(&batch->bio)))
+ __bio_endio(bio, batch);
+
+ batch_complete_aio(batch);
}
-EXPORT_SYMBOL(bio_endio);
+EXPORT_SYMBOL(batch_complete);
void bio_pair_release(struct bio_pair *bp)
{
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 172f8491a2bd..363ebe453455 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -27,6 +27,7 @@
#include <linux/namei.h>
#include <linux/log2.h>
#include <linux/cleancache.h>
+#include <linux/aio.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -616,11 +617,9 @@ void bd_forget(struct inode *inode)
struct block_device *bdev = NULL;
spin_lock(&bdev_lock);
- if (inode->i_bdev) {
- if (!sb_is_blkdev_sb(inode->i_sb))
- bdev = inode->i_bdev;
- __bd_forget(inode);
- }
+ if (!sb_is_blkdev_sb(inode->i_sb))
+ bdev = inode->i_bdev;
+ __bd_forget(inode);
spin_unlock(&bdev_lock);
if (bdev)
@@ -994,6 +993,7 @@ int revalidate_disk(struct gendisk *disk)
mutex_lock(&bdev->bd_mutex);
check_disk_size_change(disk, bdev);
+ bdev->bd_invalidated = 0;
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
return ret;
@@ -1032,7 +1032,9 @@ void bd_set_size(struct block_device *bdev, loff_t size)
{
unsigned bsize = bdev_logical_block_size(bdev);
- bdev->bd_inode->i_size = size;
+ mutex_lock(&bdev->bd_inode->i_mutex);
+ i_size_write(bdev->bd_inode, size);
+ mutex_unlock(&bdev->bd_inode->i_mutex);
while (bsize < PAGE_CACHE_SIZE) {
if (size & bsize)
break;
@@ -1117,7 +1119,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
}
}
- if (!ret && !bdev->bd_openers) {
+ if (!ret) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
bdi = blk_get_backing_dev_info(bdev);
if (bdi == NULL)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f76b1fd160d4..fcb7060aa1c2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -30,6 +30,7 @@
#include <linux/statfs.h>
#include <linux/compat.h>
#include <linux/slab.h>
+#include <linux/aio.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cc93b23ca352..ed7ea0afe9ad 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,6 +39,7 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/mount.h>
+#include <linux/aio.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
diff --git a/fs/buffer.c b/fs/buffer.c
index 5a2915f5dbc4..b095a2ac1fdb 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2369,7 +2369,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
if (unlikely(ret < 0))
goto out_unlock;
set_page_dirty(page);
- wait_on_page_writeback(page);
+ wait_for_stable_page(page);
return 0;
out_unlock:
unlock_page(page);
@@ -3237,7 +3237,7 @@ static struct kmem_cache *bh_cachep __read_mostly;
* Once the number of bh's in the machine exceeds this level, we start
* stripping them in writeback.
*/
-static int max_buffer_heads;
+static unsigned long max_buffer_heads;
int buffer_heads_over_limit;
@@ -3353,7 +3353,7 @@ EXPORT_SYMBOL(bh_submit_read);
void __init buffer_init(void)
{
- int nrpages;
+ unsigned long nrpages;
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e51558fca3a3..3d03090c6ace 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -7,6 +7,7 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/writeback.h>
+#include <linux/aio.h>
#include "super.h"
#include "mds_client.h"
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ed6208ff85a7..7a3f57f4f624 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -806,10 +806,9 @@ static bool
inode_has_hashed_dentries(struct inode *inode)
{
struct dentry *dentry;
- struct hlist_node *p;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
spin_unlock(&inode->i_lock);
return true;
diff --git a/fs/compat.c b/fs/compat.c
index fe40fde29111..ba323f8de6fc 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -48,6 +48,7 @@
#include <linux/fs_struct.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/aio.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index 177493272a61..57b9bd6da233 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -501,7 +501,7 @@ void do_coredump(siginfo_t *siginfo)
* so we dump it as root in mode 2, and only into a controlled
* environment (pipe handler or fully qualified path).
*/
- if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
+ if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
/* Setuid core dump mode */
flag = O_EXCL; /* Stop rewrite attacks */
cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
diff --git a/fs/dcache.c b/fs/dcache.c
index 19153a0a810c..070b318202ec 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -675,11 +675,10 @@ EXPORT_SYMBOL(dget_parent);
static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
{
struct dentry *alias, *discon_alias;
- struct hlist_node *p;
again:
discon_alias = NULL;
- hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
spin_lock(&alias->d_lock);
if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
if (IS_ROOT(alias) &&
@@ -730,10 +729,9 @@ EXPORT_SYMBOL(d_find_alias);
void d_prune_aliases(struct inode *inode)
{
struct dentry *dentry;
- struct hlist_node *p;
restart:
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
spin_lock(&dentry->d_lock);
if (!dentry->d_count) {
__dget_dlock(dentry);
@@ -1440,14 +1438,13 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
int len = entry->d_name.len;
const char *name = entry->d_name.name;
unsigned int hash = entry->d_name.hash;
- struct hlist_node *p;
if (!inode) {
__d_instantiate(entry, NULL);
return NULL;
}
- hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
/*
* Don't need alias->d_lock here, because aliases with
* d_parent == entry->d_parent are not subject to name or
diff --git a/fs/direct-io.c b/fs/direct-io.c
index cf5b44b10c67..55683f36a2f1 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,6 +37,7 @@
#include <linux/uio.h>
#include <linux/atomic.h>
#include <linux/prefetch.h>
+#include <linux/aio.h>
/*
* How many user pages to map in one call to get_user_pages(). This determines
@@ -229,7 +230,8 @@ static inline struct page *dio_get_page(struct dio *dio,
* filesystems can use it to hold additional state between get_block calls and
* dio_complete.
*/
-static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is_async)
+static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is_async,
+ struct batch_complete *batch)
{
ssize_t transferred = 0;
@@ -261,9 +263,9 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
dio->end_io(dio->iocb, offset, transferred,
dio->private, ret, is_async);
} else {
- if (is_async)
- aio_complete(dio->iocb, ret, 0);
inode_dio_done(dio->inode);
+ if (is_async)
+ aio_complete_batch(dio->iocb, ret, 0, batch);
}
return ret;
@@ -273,7 +275,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio);
/*
* Asynchronous IO callback.
*/
-static void dio_bio_end_aio(struct bio *bio, int error)
+static void dio_bio_end_aio(struct bio *bio, int error, struct batch_complete *batch)
{
struct dio *dio = bio->bi_private;
unsigned long remaining;
@@ -289,7 +291,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (remaining == 0) {
- dio_complete(dio, dio->iocb->ki_pos, 0, true);
+ dio_complete(dio, dio->iocb->ki_pos, 0, true, batch);
kmem_cache_free(dio_cache, dio);
}
}
@@ -328,7 +330,7 @@ void dio_end_io(struct bio *bio, int error)
struct dio *dio = bio->bi_private;
if (dio->is_async)
- dio_bio_end_aio(bio, error);
+ dio_bio_end_aio(bio, error, NULL);
else
dio_bio_end_io(bio, error);
}
@@ -349,9 +351,10 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
bio->bi_bdev = bdev;
bio->bi_sector = first_sector;
- if (dio->is_async)
- bio->bi_end_io = dio_bio_end_aio;
- else
+ if (dio->is_async) {
+ bio->bi_batch_end_io = dio_bio_end_aio;
+ bio->bi_flags |= 1 << BIO_BATCH_ENDIO;
+ } else
bio->bi_end_io = dio_bio_end_io;
sdio->bio = bio;
@@ -1272,7 +1275,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
dio_await_completion(dio);
if (drop_refcount(dio) == 0) {
- retval = dio_complete(dio, offset, retval, false);
+ retval = dio_complete(dio, offset, retval, false, NULL);
kmem_cache_free(dio_cache, dio);
} else
BUG_ON(retval != -EIOCBQUEUED);
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index a0387dd8b1f0..7d58d5b112b5 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -158,7 +158,7 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
unsigned int x;
if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return -EPERM;
x = simple_strtoul(buf, NULL, 0);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index f7501651762d..1b1146670c4b 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1183,7 +1183,7 @@ static void detach_lkb(struct dlm_lkb *lkb)
static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
{
struct dlm_lkb *lkb;
- int rv, id;
+ int rv;
lkb = dlm_allocate_lkb(ls);
if (!lkb)
@@ -1199,19 +1199,13 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
mutex_init(&lkb->lkb_cb_mutex);
INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
- retry:
- rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS);
- if (!rv)
- return -ENOMEM;
-
+ idr_preload(GFP_NOFS);
spin_lock(&ls->ls_lkbidr_spin);
- rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id);
- if (!rv)
- lkb->lkb_id = id;
+ rv = idr_alloc(&ls->ls_lkbidr, lkb, 1, 0, GFP_NOWAIT);
+ if (rv >= 0)
+ lkb->lkb_id = rv;
spin_unlock(&ls->ls_lkbidr_spin);
-
- if (rv == -EAGAIN)
- goto retry;
+ idr_preload_end();
if (rv < 0) {
log_error(ls, "create_lkb idr error %d", rv);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 2e99fb0c9737..3ca79d3253b9 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -796,7 +796,6 @@ static int release_lockspace(struct dlm_ls *ls, int force)
*/
idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
- idr_remove_all(&ls->ls_lkbidr);
idr_destroy(&ls->ls_lkbidr);
/*
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index dd87a31bcc21..036ecb5c17db 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -177,12 +177,11 @@ static inline int nodeid_hash(int nodeid)
static struct connection *__find_con(int nodeid)
{
int r;
- struct hlist_node *h;
struct connection *con;
r = nodeid_hash(nodeid);
- hlist_for_each_entry(con, h, &connection_hash[r], list) {
+ hlist_for_each_entry(con, &connection_hash[r], list) {
if (con->nodeid == nodeid)
return con;
}
@@ -232,11 +231,11 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
static void foreach_conn(void (*conn_func)(struct connection *c))
{
int i;
- struct hlist_node *h, *n;
+ struct hlist_node *n;
struct connection *con;
for (i = 0; i < CONN_HASH_SIZE; i++) {
- hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){
+ hlist_for_each_entry_safe(con, n, &connection_hash[i], list){
conn_func(con);
}
}
@@ -257,13 +256,12 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
static struct connection *assoc2con(int assoc_id)
{
int i;
- struct hlist_node *h;
struct connection *con;
mutex_lock(&connections_lock);
for (i = 0 ; i < CONN_HASH_SIZE; i++) {
- hlist_for_each_entry(con, h, &connection_hash[i], list) {
+ hlist_for_each_entry(con, &connection_hash[i], list) {
if (con->sctp_assoc == assoc_id) {
mutex_unlock(&connections_lock);
return con;
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index aedea28a86a1..a6bc63f6e31b 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -305,27 +305,26 @@ static int recover_idr_empty(struct dlm_ls *ls)
static int recover_idr_add(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
- int rv, id;
-
- rv = idr_pre_get(&ls->ls_recover_idr, GFP_NOFS);
- if (!rv)
- return -ENOMEM;
+ int rv;
+ idr_preload(GFP_NOFS);
spin_lock(&ls->ls_recover_idr_lock);
if (r->res_id) {
- spin_unlock(&ls->ls_recover_idr_lock);
- return -1;
- }
- rv = idr_get_new_above(&ls->ls_recover_idr, r, 1, &id);
- if (rv) {
- spin_unlock(&ls->ls_recover_idr_lock);
- return rv;
+ rv = -1;
+ goto out_unlock;
}
- r->res_id = id;
+ rv = idr_alloc(&ls->ls_recover_idr, r, 1, 0, GFP_NOWAIT);
+ if (rv < 0)
+ goto out_unlock;
+
+ r->res_id = rv;
ls->ls_recover_list_count++;
dlm_hold_rsb(r);
+ rv = 0;
+out_unlock:
spin_unlock(&ls->ls_recover_idr_lock);
- return 0;
+ idr_preload_end();
+ return rv;
}
static void recover_idr_del(struct dlm_rsb *r)
@@ -351,24 +350,21 @@ static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id)
return r;
}
-static int recover_idr_clear_rsb(int id, void *p, void *data)
+static void recover_idr_clear(struct dlm_ls *ls)
{
- struct dlm_ls *ls = data;
- struct dlm_rsb *r = p;
+ struct dlm_rsb *r;
+ int id;
- r->res_id = 0;
- r->res_recover_locks_count = 0;
- ls->ls_recover_list_count--;
+ spin_lock(&ls->ls_recover_idr_lock);
- dlm_put_rsb(r);
- return 0;
-}
+ idr_for_each_entry(&ls->ls_recover_idr, r, id) {
+ idr_remove(&ls->ls_recover_idr, id);
+ r->res_id = 0;
+ r->res_recover_locks_count = 0;
+ ls->ls_recover_list_count--;
-static void recover_idr_clear(struct dlm_ls *ls)
-{
- spin_lock(&ls->ls_recover_idr_lock);
- idr_for_each(&ls->ls_recover_idr, recover_idr_clear_rsb, ls);
- idr_remove_all(&ls->ls_recover_idr);
+ dlm_put_rsb(r);
+ }
if (ls->ls_recover_list_count != 0) {
log_error(ls, "warning: recover_list_count %d",
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index c00e055b6282..f23d2a7ed438 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -58,6 +58,8 @@ int drop_caches_sysctl_handler(ctl_table *table, int write,
if (ret)
return ret;
if (write) {
+ printk(KERN_NOTICE "%s (%d): dropped kernel caches: %d\n",
+ current->comm, task_pid_nr(current), sysctl_drop_caches);
if (sysctl_drop_caches & 1)
iterate_supers(drop_pagecache_sb, NULL);
if (sysctl_drop_caches & 2)
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index bfa52d2ef460..857d545c7ef7 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/compat.h>
#include <linux/fs_stack.h>
+#include <linux/aio.h>
#include "ecryptfs_kernel.h"
/**
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 5fa2471796c2..8d7a577ae497 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -115,10 +115,9 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
*/
int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon)
{
- struct hlist_node *elem;
int rc;
- hlist_for_each_entry(*daemon, elem,
+ hlist_for_each_entry(*daemon,
&ecryptfs_daemon_hash[ecryptfs_current_euid_hash()],
euid_chain) {
if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) {
@@ -445,7 +444,6 @@ void ecryptfs_release_messaging(void)
mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
}
if (ecryptfs_daemon_hash) {
- struct hlist_node *elem;
struct ecryptfs_daemon *daemon;
int i;
@@ -453,7 +451,7 @@ void ecryptfs_release_messaging(void)
for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
int rc;
- hlist_for_each_entry(daemon, elem,
+ hlist_for_each_entry(daemon,
&ecryptfs_daemon_hash[i],
euid_chain) {
rc = ecryptfs_exorcise_daemon(daemon);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9fec1836057a..5dcc470879c7 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -348,7 +348,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
static inline int ep_op_has_event(int op)
{
- return op != EPOLL_CTL_DEL;
+ return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD;
}
/* Initialize the poll safe wake up structure */
@@ -678,6 +678,36 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
return 0;
}
+/*
+ * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item
+ * had no event flags set, indicating that another thread may be currently
+ * handling that item's events (in the case that EPOLLONESHOT was being
+ * used). Otherwise a zero result indicates that the item has been disabled
+ * from receiving events. A disabled item may be re-enabled via
+ * EPOLL_CTL_MOD. Must be called with "mtx" held.
+ */
+static int ep_disable(struct eventpoll *ep, struct epitem *epi)
+{
+ int result = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ep->lock, flags);
+ if (epi->event.events & EPOLLONESHOT) {
+ if (epi->event.events & ~EP_PRIVATE_BITS) {
+ if (ep_is_linked(&epi->rdllink))
+ list_del_init(&epi->rdllink);
+ /* Ensure ep_poll_callback will not add epi back onto
+ ready list: */
+ epi->event.events &= EP_PRIVATE_BITS;
+ } else
+ result = -EBUSY;
+ } else
+ result = -EINVAL;
+ spin_unlock_irqrestore(&ep->lock, flags);
+
+ return result;
+}
+
static void ep_free(struct eventpoll *ep)
{
struct rb_node *rbp;
@@ -1048,8 +1078,6 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
rb_insert_color(&epi->rbn, &ep->rbr);
}
-
-
#define PATH_ARR_SIZE 5
/*
* These are the number paths of length 1 to 5, that we are allowing to emanate
@@ -1835,6 +1863,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
} else
error = -ENOENT;
break;
+ case EPOLL_CTL_DISABLE:
+ if (epi)
+ error = ep_disable(ep, epi);
+ else
+ error = -ENOENT;
+ break;
}
mutex_unlock(&ep->mtx);
diff --git a/fs/exec.c b/fs/exec.c
index 20df02c1cc70..dc38755fe538 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -355,7 +355,7 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
* flags, permissions, and offset, so we use temporary values. We'll update
* them later in setup_arg_pages().
*/
-int bprm_mm_init(struct linux_binprm *bprm)
+static int bprm_mm_init(struct linux_binprm *bprm)
{
int err;
struct mm_struct *mm = NULL;
@@ -1111,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm)
current->sas_ss_sp = current->sas_ss_size = 0;
if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))
- set_dumpable(current->mm, SUID_DUMPABLE_ENABLED);
+ set_dumpable(current->mm, SUID_DUMP_USER);
else
set_dumpable(current->mm, suid_dumpable);
@@ -1639,17 +1639,17 @@ EXPORT_SYMBOL(set_binfmt);
void set_dumpable(struct mm_struct *mm, int value)
{
switch (value) {
- case SUID_DUMPABLE_DISABLED:
+ case SUID_DUMP_DISABLE:
clear_bit(MMF_DUMPABLE, &mm->flags);
smp_wmb();
clear_bit(MMF_DUMP_SECURELY, &mm->flags);
break;
- case SUID_DUMPABLE_ENABLED:
+ case SUID_DUMP_USER:
set_bit(MMF_DUMPABLE, &mm->flags);
smp_wmb();
clear_bit(MMF_DUMP_SECURELY, &mm->flags);
break;
- case SUID_DUMPABLE_SAFE:
+ case SUID_DUMP_ROOT:
set_bit(MMF_DUMP_SECURELY, &mm->flags);
smp_wmb();
set_bit(MMF_DUMPABLE, &mm->flags);
@@ -1662,7 +1662,7 @@ int __get_dumpable(unsigned long mm_flags)
int ret;
ret = mm_flags & MMF_DUMPABLE_MASK;
- return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret;
+ return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
}
int get_dumpable(struct mm_struct *mm)
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 5df4bb4aab14..262fc9940982 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -44,14 +44,13 @@ find_acceptable_alias(struct dentry *result,
{
struct dentry *dentry, *toput = NULL;
struct inode *inode;
- struct hlist_node *p;
if (acceptable(context, result))
return result;
inode = result->d_inode;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
dget(dentry);
spin_unlock(&inode->i_lock);
if (toput)
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c3881e56662e..43d72d0ec7e2 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -31,6 +31,7 @@
#include <linux/mpage.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
+#include <linux/aio.h>
#include "ext2.h"
#include "acl.h"
#include "xip.h"
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index d512c4bc4ad7..eac4f041f5fc 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -27,6 +27,7 @@
#include <linux/writeback.h>
#include <linux/mpage.h>
#include <linux/namei.h>
+#include <linux/aio.h>
#include "ext3.h"
#include "xattr.h"
#include "acl.h"
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0926fe46ae3e..5546ca225ffe 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2067,6 +2067,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
"writeback");
+ sb->s_flags |= MS_SNAP_STABLE;
return 0;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index afaf9f15303e..fe2afd554622 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -23,6 +23,7 @@
#include <linux/jbd2.h>
#include <linux/mount.h>
#include <linux/path.h>
+#include <linux/aio.h>
#include <linux/quotaops.h>
#include <linux/pagevec.h>
#include "ext4.h"
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index bdd20231e66c..5ef1408276e3 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -20,6 +20,7 @@
* (sct@redhat.com), 1993, 1998
*/
+#include <linux/aio.h>
#include "ext4_jbd2.h"
#include "truncate.h"
#include "ext4_extents.h" /* Needed for EXT_MAX_BLOCKS */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d0da1f26f20d..4b8d3c6c168c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,6 +37,7 @@
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
+#include <linux/aio.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -4841,7 +4842,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
0, len, NULL,
ext4_bh_unmapped)) {
/* Wait so that we don't change page under IO */
- wait_on_page_writeback(page);
+ wait_for_stable_page(page);
ret = VM_FAULT_LOCKED;
goto out;
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 809b31003ecc..d9903af92e51 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -18,6 +18,7 @@
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/namei.h>
+#include <linux/aio.h>
#include <linux/uio.h>
#include <linux/bio.h>
#include <linux/workqueue.h>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7bd22a201125..d0ed4ba4b61b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,6 +12,7 @@
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
+#include <linux/aio.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 58bf744dbf39..78dabf00f46f 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -484,10 +484,10 @@ parse_record:
nr_slots = 0;
if (de->name[0] == DELETED_FLAG)
continue;
+ if (!de->name[0])
+ goto end_of_dir;
if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
continue;
- if (de->attr != ATTR_EXT && IS_FREE(de->name))
- continue;
if (de->attr == ATTR_EXT) {
int status = fat_parse_long(inode, &cpos, &bh, &de,
&unicode, &nr_slots);
@@ -608,8 +608,8 @@ parse_record:
goto record_end;
if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
goto record_end;
- if (de->attr != ATTR_EXT && IS_FREE(de->name))
- goto record_end;
+ if (!de->name[0])
+ goto end_of_dir;
} else {
if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name))
goto record_end;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 12701a567752..e9cc3f0d58e2 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -95,6 +95,8 @@ struct msdos_sb_info {
spinlock_t dir_hash_lock;
struct hlist_head dir_hashtable[FAT_HASH_SIZE];
+
+ unsigned int dirty; /* fs state before mount */
};
#define FAT_CACHE_VALID 0 /* special case for valid cache */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index f8f491677a4a..d1d502a026a5 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -20,6 +20,7 @@
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
#include <linux/mount.h>
+#include <linux/aio.h>
#include <linux/vfs.h>
#include <linux/parser.h>
#include <linux/uio.h>
@@ -341,12 +342,11 @@ struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
- struct hlist_node *_p;
struct msdos_inode_info *i;
struct inode *inode = NULL;
spin_lock(&sbi->inode_hash_lock);
- hlist_for_each_entry(i, _p, head, i_fat_hash) {
+ hlist_for_each_entry(i, head, i_fat_hash) {
BUG_ON(i->vfs_inode.i_sb != sb);
if (i->i_pos != i_pos)
continue;
@@ -488,10 +488,59 @@ static void fat_evict_inode(struct inode *inode)
fat_detach(inode);
}
+static void fat_set_state(struct super_block *sb,
+ unsigned int set, unsigned int force)
+{
+ struct buffer_head *bh;
+ struct fat_boot_sector *b;
+ struct msdos_sb_info *sbi = sb->s_fs_info;
+
+ /* do not change any thing if mounted read only */
+ if ((sb->s_flags & MS_RDONLY) && !force)
+ return;
+
+ /* do not change state if fs was dirty */
+ if (sbi->dirty) {
+ /* warn only on set (mount). */
+ if (set)
+ fat_msg(sb, KERN_WARNING, "Volume was not properly "
+ "unmounted. Some data may be corrupt. "
+ "Please run fsck.");
+ return;
+ }
+
+ bh = sb_bread(sb, 0);
+ if (bh == NULL) {
+ fat_msg(sb, KERN_ERR, "unable to read boot sector "
+ "to mark fs as dirty");
+ return;
+ }
+
+ b = (struct fat_boot_sector *) bh->b_data;
+
+ if (sbi->fat_bits == 32) {
+ if (set)
+ b->fat32.state |= FAT_STATE_DIRTY;
+ else
+ b->fat32.state &= ~FAT_STATE_DIRTY;
+ } else /* fat 16 and 12 */ {
+ if (set)
+ b->fat16.state |= FAT_STATE_DIRTY;
+ else
+ b->fat16.state &= ~FAT_STATE_DIRTY;
+ }
+
+ mark_buffer_dirty(bh);
+ sync_dirty_buffer(bh);
+ brelse(bh);
+}
+
static void fat_put_super(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ fat_set_state(sb, 0, 0);
+
iput(sbi->fsinfo_inode);
iput(sbi->fat_inode);
@@ -566,8 +615,18 @@ static void __exit fat_destroy_inodecache(void)
static int fat_remount(struct super_block *sb, int *flags, char *data)
{
+ int new_rdonly;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
*flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME);
+
+ /* make sure we update state on remount. */
+ new_rdonly = *flags & MS_RDONLY;
+ if (new_rdonly != (sb->s_flags & MS_RDONLY)) {
+ if (new_rdonly)
+ fat_set_state(sb, 0, 0);
+ else
+ fat_set_state(sb, 1, 1);
+ }
return 0;
}
@@ -1298,17 +1357,17 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
sbi->prev_free = FAT_START_ENT;
sb->s_maxbytes = 0xffffffff;
- if (!sbi->fat_length && b->fat32_length) {
+ if (!sbi->fat_length && b->fat32.length) {
struct fat_boot_fsinfo *fsinfo;
struct buffer_head *fsinfo_bh;
/* Must be FAT32 */
sbi->fat_bits = 32;
- sbi->fat_length = le32_to_cpu(b->fat32_length);
- sbi->root_cluster = le32_to_cpu(b->root_cluster);
+ sbi->fat_length = le32_to_cpu(b->fat32.length);
+ sbi->root_cluster = le32_to_cpu(b->fat32.root_cluster);
/* MC - if info_sector is 0, don't multiply by 0 */
- sbi->fsinfo_sector = le16_to_cpu(b->info_sector);
+ sbi->fsinfo_sector = le16_to_cpu(b->fat32.info_sector);
if (sbi->fsinfo_sector == 0)
sbi->fsinfo_sector = 1;
@@ -1362,6 +1421,12 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
if (sbi->fat_bits != 32)
sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12;
+ /* some OSes set FAT_STATE_DIRTY and clean it on unmount. */
+ if (sbi->fat_bits == 32)
+ sbi->dirty = b->fat32.state & FAT_STATE_DIRTY;
+ else /* fat 16 or 12 */
+ sbi->dirty = b->fat16.state & FAT_STATE_DIRTY;
+
/* check that FAT table does not overflow */
fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits;
total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
@@ -1456,6 +1521,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
"the device does not support discard");
}
+ fat_set_state(sb, 1, 0);
return 0;
out_invalid:
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
index ef4b5faba87b..499c10438ca2 100644
--- a/fs/fat/nfs.c
+++ b/fs/fat/nfs.c
@@ -21,13 +21,12 @@ static struct inode *fat_dget(struct super_block *sb, int i_logstart)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct hlist_head *head;
- struct hlist_node *_p;
struct msdos_inode_info *i;
struct inode *inode = NULL;
head = sbi->dir_hashtable + fat_dir_hash(i_logstart);
spin_lock(&sbi->dir_hash_lock);
- hlist_for_each_entry(i, _p, head, i_dir_hash) {
+ hlist_for_each_entry(i, head, i_dir_hash) {
BUG_ON(i->vfs_inode.i_sb != sb);
if (i->i_logstart != i_logstart)
continue;
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 8dcb114758e3..e2cba1f60c21 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -237,13 +237,12 @@ static int fscache_alloc_object(struct fscache_cache *cache,
struct fscache_cookie *cookie)
{
struct fscache_object *object;
- struct hlist_node *_n;
int ret;
_enter("%p,%p{%s}", cache, cookie, cookie->def->name);
spin_lock(&cookie->lock);
- hlist_for_each_entry(object, _n, &cookie->backing_objects,
+ hlist_for_each_entry(object, &cookie->backing_objects,
cookie_link) {
if (object->cache == cache)
goto object_already_extant;
@@ -311,7 +310,6 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
{
struct fscache_object *p;
struct fscache_cache *cache = object->cache;
- struct hlist_node *_n;
int ret;
_enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id);
@@ -321,7 +319,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
/* there may be multiple initial creations of this object, but we only
* want one */
ret = -EEXIST;
- hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) {
+ hlist_for_each_entry(p, &cookie->backing_objects, cookie_link) {
if (p->cache == object->cache) {
if (p->state >= FSCACHE_OBJECT_DYING)
ret = -ENOBUFS;
@@ -331,7 +329,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
/* pin the parent object */
spin_lock_nested(&cookie->parent->lock, 1);
- hlist_for_each_entry(p, _n, &cookie->parent->backing_objects,
+ hlist_for_each_entry(p, &cookie->parent->backing_objects,
cookie_link) {
if (p->cache == object->cache) {
if (p->state >= FSCACHE_OBJECT_DYING) {
@@ -435,7 +433,6 @@ EXPORT_SYMBOL(__fscache_wait_on_invalidate);
void __fscache_update_cookie(struct fscache_cookie *cookie)
{
struct fscache_object *object;
- struct hlist_node *_p;
fscache_stat(&fscache_n_updates);
@@ -452,7 +449,7 @@ void __fscache_update_cookie(struct fscache_cookie *cookie)
spin_lock(&cookie->lock);
/* update the index entry on disk in each cache backing this cookie */
- hlist_for_each_entry(object, _p,
+ hlist_for_each_entry(object,
&cookie->backing_objects, cookie_link) {
fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
}
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 6f96a8def147..06b5e086ab3a 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -38,6 +38,7 @@
#include <linux/device.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/aio.h>
#include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/list.h>
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cbae09e5a491..95de0a2b8f0b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,6 +19,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/swap.h>
#include <linux/splice.h>
+#include <linux/aio.h>
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a010585b0a74..3f95325a0312 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/compat.h>
#include <linux/swap.h>
+#include <linux/aio.h>
static const struct file_operations fuse_direct_io_file_operations;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 24f414f0ce61..371bd144d802 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -20,6 +20,7 @@
#include <linux/swap.h>
#include <linux/gfs2_ondisk.h>
#include <linux/backing-dev.h>
+#include <linux/aio.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 06b7092a3f25..2d42817442fb 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -25,6 +25,7 @@
#include <asm/uaccess.h>
#include <linux/dlm.h>
#include <linux/dlm_plock.h>
+#include <linux/aio.h>
#include "gfs2.h"
#include "incore.h"
@@ -483,7 +484,7 @@ out:
gfs2_holder_uninit(&gh);
if (ret == 0) {
set_page_dirty(page);
- wait_on_page_writeback(page);
+ wait_for_stable_page(page);
}
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(ret);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 462e84142759..eb81cef67fa3 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -103,7 +103,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
int n = simple_strtol(buf, NULL, 0);
if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return -EPERM;
switch (n) {
case 0:
@@ -133,7 +133,7 @@ static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return -EPERM;
if (simple_strtol(buf, NULL, 0) != 1)
return -EINVAL;
@@ -148,7 +148,7 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return -EPERM;
if (simple_strtol(buf, NULL, 0) != 1)
return -EINVAL;
@@ -161,7 +161,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return -EPERM;
if (simple_strtol(buf, NULL, 0) != 1)
return -EINVAL;
@@ -177,7 +177,7 @@ static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
u32 id;
if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return -EPERM;
id = simple_strtoul(buf, NULL, 0);
@@ -192,7 +192,7 @@ static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
u32 id;
if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return -EPERM;
id = simple_strtoul(buf, NULL, 0);
@@ -211,7 +211,7 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len
int rv;
if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return -EPERM;
rv = sscanf(buf, "%u:%llu %15s", &gltype, &glnum,
mode);
@@ -500,7 +500,7 @@ static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
unsigned int x, y;
if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return -EPERM;
if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
return -EINVAL;
@@ -519,7 +519,7 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
unsigned int x;
if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ return -EPERM;
x = simple_strtoul(buf, NULL, 0);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d47f11658c17..b2f3025827a1 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,6 +14,7 @@
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/sched.h>
+#include <linux/aio.h>
#include "hfs_fs.h"
#include "btree.h"
diff --git a/fs/hfsplus/Makefile b/fs/hfsplus/Makefile
index 3cc0df730156..09d278bb7b91 100644
--- a/fs/hfsplus/Makefile
+++ b/fs/hfsplus/Makefile
@@ -5,5 +5,5 @@
obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o
hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \
- bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o
-
+ bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \
+ attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o
diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c
new file mode 100644
index 000000000000..8d691f124714
--- /dev/null
+++ b/fs/hfsplus/attributes.c
@@ -0,0 +1,399 @@
+/*
+ * linux/fs/hfsplus/attributes.c
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Handling of records in attributes tree
+ */
+
+#include "hfsplus_fs.h"
+#include "hfsplus_raw.h"
+
+static struct kmem_cache *hfsplus_attr_tree_cachep;
+
+int hfsplus_create_attr_tree_cache(void)
+{
+ if (hfsplus_attr_tree_cachep)
+ return -EEXIST;
+
+ hfsplus_attr_tree_cachep =
+ kmem_cache_create("hfsplus_attr_cache",
+ sizeof(hfsplus_attr_entry), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!hfsplus_attr_tree_cachep)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void hfsplus_destroy_attr_tree_cache(void)
+{
+ kmem_cache_destroy(hfsplus_attr_tree_cachep);
+}
+
+int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *k1,
+ const hfsplus_btree_key *k2)
+{
+ __be32 k1_cnid, k2_cnid;
+
+ k1_cnid = k1->attr.cnid;
+ k2_cnid = k2->attr.cnid;
+ if (k1_cnid != k2_cnid)
+ return be32_to_cpu(k1_cnid) < be32_to_cpu(k2_cnid) ? -1 : 1;
+
+ return hfsplus_strcmp(
+ (const struct hfsplus_unistr *)&k1->attr.key_name,
+ (const struct hfsplus_unistr *)&k2->attr.key_name);
+}
+
+int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key,
+ u32 cnid, const char *name)
+{
+ int len;
+
+ memset(key, 0, sizeof(struct hfsplus_attr_key));
+ key->attr.cnid = cpu_to_be32(cnid);
+ if (name) {
+ len = strlen(name);
+ if (len > HFSPLUS_ATTR_MAX_STRLEN) {
+ printk(KERN_ERR "hfs: invalid xattr name's length\n");
+ return -EINVAL;
+ }
+ hfsplus_asc2uni(sb,
+ (struct hfsplus_unistr *)&key->attr.key_name,
+ HFSPLUS_ATTR_MAX_STRLEN, name, len);
+ len = be16_to_cpu(key->attr.key_name.length);
+ } else {
+ key->attr.key_name.length = 0;
+ len = 0;
+ }
+
+ /* The length of the key, as stored in key_len field, does not include
+ * the size of the key_len field itself.
+ * So, offsetof(hfsplus_attr_key, key_name) is a trick because
+ * it takes into consideration key_len field (__be16) of
+ * hfsplus_attr_key structure instead of length field (__be16) of
+ * hfsplus_attr_unistr structure.
+ */
+ key->key_len =
+ cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) +
+ 2 * len);
+
+ return 0;
+}
+
+void hfsplus_attr_build_key_uni(hfsplus_btree_key *key,
+ u32 cnid,
+ struct hfsplus_attr_unistr *name)
+{
+ int ustrlen;
+
+ memset(key, 0, sizeof(struct hfsplus_attr_key));
+ ustrlen = be16_to_cpu(name->length);
+ key->attr.cnid = cpu_to_be32(cnid);
+ key->attr.key_name.length = cpu_to_be16(ustrlen);
+ ustrlen *= 2;
+ memcpy(key->attr.key_name.unicode, name->unicode, ustrlen);
+
+ /* The length of the key, as stored in key_len field, does not include
+ * the size of the key_len field itself.
+ * So, offsetof(hfsplus_attr_key, key_name) is a trick because
+ * it takes into consideration key_len field (__be16) of
+ * hfsplus_attr_key structure instead of length field (__be16) of
+ * hfsplus_attr_unistr structure.
+ */
+ key->key_len =
+ cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) +
+ ustrlen);
+}
+
+hfsplus_attr_entry *hfsplus_alloc_attr_entry(void)
+{
+ return kmem_cache_alloc(hfsplus_attr_tree_cachep, GFP_KERNEL);
+}
+
+void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry)
+{
+ if (entry)
+ kmem_cache_free(hfsplus_attr_tree_cachep, entry);
+}
+
+#define HFSPLUS_INVALID_ATTR_RECORD -1
+
+static int hfsplus_attr_build_record(hfsplus_attr_entry *entry, int record_type,
+ u32 cnid, const void *value, size_t size)
+{
+ if (record_type == HFSPLUS_ATTR_FORK_DATA) {
+ /*
+ * Mac OS X supports only inline data attributes.
+ * Do nothing
+ */
+ memset(entry, 0, sizeof(*entry));
+ return sizeof(struct hfsplus_attr_fork_data);
+ } else if (record_type == HFSPLUS_ATTR_EXTENTS) {
+ /*
+ * Mac OS X supports only inline data attributes.
+ * Do nothing.
+ */
+ memset(entry, 0, sizeof(*entry));
+ return sizeof(struct hfsplus_attr_extents);
+ } else if (record_type == HFSPLUS_ATTR_INLINE_DATA) {
+ u16 len;
+
+ memset(entry, 0, sizeof(struct hfsplus_attr_inline_data));
+ entry->inline_data.record_type = cpu_to_be32(record_type);
+ if (size <= HFSPLUS_MAX_INLINE_DATA_SIZE)
+ len = size;
+ else
+ return HFSPLUS_INVALID_ATTR_RECORD;
+ entry->inline_data.length = cpu_to_be16(len);
+ memcpy(entry->inline_data.raw_bytes, value, len);
+ /*
+ * Align len on two-byte boundary.
+ * It needs to add pad byte if we have odd len.
+ */
+ len = round_up(len, 2);
+ return offsetof(struct hfsplus_attr_inline_data, raw_bytes) +
+ len;
+ } else /* invalid input */
+ memset(entry, 0, sizeof(*entry));
+
+ return HFSPLUS_INVALID_ATTR_RECORD;
+}
+
+int hfsplus_find_attr(struct super_block *sb, u32 cnid,
+ const char *name, struct hfs_find_data *fd)
+{
+ int err = 0;
+
+ dprint(DBG_ATTR_MOD, "find_attr: %s,%d\n", name ? name : NULL, cnid);
+
+ if (!HFSPLUS_SB(sb)->attr_tree) {
+ printk(KERN_ERR "hfs: attributes file doesn't exist\n");
+ return -EINVAL;
+ }
+
+ if (name) {
+ err = hfsplus_attr_build_key(sb, fd->search_key, cnid, name);
+ if (err)
+ goto failed_find_attr;
+ err = hfs_brec_find(fd, hfs_find_rec_by_key);
+ if (err)
+ goto failed_find_attr;
+ } else {
+ err = hfsplus_attr_build_key(sb, fd->search_key, cnid, NULL);
+ if (err)
+ goto failed_find_attr;
+ err = hfs_brec_find(fd, hfs_find_1st_rec_by_cnid);
+ if (err)
+ goto failed_find_attr;
+ }
+
+failed_find_attr:
+ return err;
+}
+
+int hfsplus_attr_exists(struct inode *inode, const char *name)
+{
+ int err = 0;
+ struct super_block *sb = inode->i_sb;
+ struct hfs_find_data fd;
+
+ if (!HFSPLUS_SB(sb)->attr_tree)
+ return 0;
+
+ err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd);
+ if (err)
+ return 0;
+
+ err = hfsplus_find_attr(sb, inode->i_ino, name, &fd);
+ if (err)
+ goto attr_not_found;
+
+ hfs_find_exit(&fd);
+ return 1;
+
+attr_not_found:
+ hfs_find_exit(&fd);
+ return 0;
+}
+
+int hfsplus_create_attr(struct inode *inode,
+ const char *name,
+ const void *value, size_t size)
+{
+ struct super_block *sb = inode->i_sb;
+ struct hfs_find_data fd;
+ hfsplus_attr_entry *entry_ptr;
+ int entry_size;
+ int err;
+
+ dprint(DBG_ATTR_MOD, "create_attr: %s,%ld\n",
+ name ? name : NULL, inode->i_ino);
+
+ if (!HFSPLUS_SB(sb)->attr_tree) {
+ printk(KERN_ERR "hfs: attributes file doesn't exist\n");
+ return -EINVAL;
+ }
+
+ entry_ptr = hfsplus_alloc_attr_entry();
+ if (!entry_ptr)
+ return -ENOMEM;
+
+ err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd);
+ if (err)
+ goto failed_init_create_attr;
+
+ if (name) {
+ err = hfsplus_attr_build_key(sb, fd.search_key,
+ inode->i_ino, name);
+ if (err)
+ goto failed_create_attr;
+ } else {
+ err = -EINVAL;
+ goto failed_create_attr;
+ }
+
+ /* Mac OS X supports only inline data attributes. */
+ entry_size = hfsplus_attr_build_record(entry_ptr,
+ HFSPLUS_ATTR_INLINE_DATA,
+ inode->i_ino,
+ value, size);
+ if (entry_size == HFSPLUS_INVALID_ATTR_RECORD) {
+ err = -EINVAL;
+ goto failed_create_attr;
+ }
+
+ err = hfs_brec_find(&fd, hfs_find_rec_by_key);
+ if (err != -ENOENT) {
+ if (!err)
+ err = -EEXIST;
+ goto failed_create_attr;
+ }
+
+ err = hfs_brec_insert(&fd, entry_ptr, entry_size);
+ if (err)
+ goto failed_create_attr;
+
+ hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY);
+
+failed_create_attr:
+ hfs_find_exit(&fd);
+
+failed_init_create_attr:
+ hfsplus_destroy_attr_entry(entry_ptr);
+ return err;
+}
+
+static int __hfsplus_delete_attr(struct inode *inode, u32 cnid,
+ struct hfs_find_data *fd)
+{
+ int err = 0;
+ __be32 found_cnid, record_type;
+
+ hfs_bnode_read(fd->bnode, &found_cnid,
+ fd->keyoffset +
+ offsetof(struct hfsplus_attr_key, cnid),
+ sizeof(__be32));
+ if (cnid != be32_to_cpu(found_cnid))
+ return -ENOENT;
+
+ hfs_bnode_read(fd->bnode, &record_type,
+ fd->entryoffset, sizeof(record_type));
+
+ switch (be32_to_cpu(record_type)) {
+ case HFSPLUS_ATTR_INLINE_DATA:
+ /* All is OK. Do nothing. */
+ break;
+ case HFSPLUS_ATTR_FORK_DATA:
+ case HFSPLUS_ATTR_EXTENTS:
+ printk(KERN_ERR "hfs: only inline data xattr are supported\n");
+ return -EOPNOTSUPP;
+ default:
+ printk(KERN_ERR "hfs: invalid extended attribute record\n");
+ return -ENOENT;
+ }
+
+ err = hfs_brec_remove(fd);
+ if (err)
+ return err;
+
+ hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY);
+ return err;
+}
+
+int hfsplus_delete_attr(struct inode *inode, const char *name)
+{
+ int err = 0;
+ struct super_block *sb = inode->i_sb;
+ struct hfs_find_data fd;
+
+ dprint(DBG_ATTR_MOD, "delete_attr: %s,%ld\n",
+ name ? name : NULL, inode->i_ino);
+
+ if (!HFSPLUS_SB(sb)->attr_tree) {
+ printk(KERN_ERR "hfs: attributes file doesn't exist\n");
+ return -EINVAL;
+ }
+
+ err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd);
+ if (err)
+ return err;
+
+ if (name) {
+ err = hfsplus_attr_build_key(sb, fd.search_key,
+ inode->i_ino, name);
+ if (err)
+ goto out;
+ } else {
+ printk(KERN_ERR "hfs: invalid extended attribute name\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = hfs_brec_find(&fd, hfs_find_rec_by_key);
+ if (err)
+ goto out;
+
+ err = __hfsplus_delete_attr(inode, inode->i_ino, &fd);
+ if (err)
+ goto out;
+
+out:
+ hfs_find_exit(&fd);
+ return err;
+}
+
+int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid)
+{
+ int err = 0;
+ struct hfs_find_data fd;
+
+ dprint(DBG_ATTR_MOD, "delete_all_attrs: %d\n", cnid);
+
+ if (!HFSPLUS_SB(dir->i_sb)->attr_tree) {
+ printk(KERN_ERR "hfs: attributes file doesn't exist\n");
+ return -EINVAL;
+ }
+
+ err = hfs_find_init(HFSPLUS_SB(dir->i_sb)->attr_tree, &fd);
+ if (err)
+ return err;
+
+ for (;;) {
+ err = hfsplus_find_attr(dir->i_sb, cnid, NULL, &fd);
+ if (err) {
+ if (err != -ENOENT)
+ printk(KERN_ERR "hfs: xattr search failed.\n");
+ goto end_delete_all;
+ }
+
+ err = __hfsplus_delete_attr(dir, cnid, &fd);
+ if (err)
+ goto end_delete_all;
+ }
+
+end_delete_all:
+ hfs_find_exit(&fd);
+ return err;
+}
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index 5d799c13205f..d73c98d1ee99 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -24,7 +24,19 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
fd->key = ptr + tree->max_key_len + 2;
dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n",
tree->cnid, __builtin_return_address(0));
- mutex_lock(&tree->tree_lock);
+ switch (tree->cnid) {
+ case HFSPLUS_CAT_CNID:
+ mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX);
+ break;
+ case HFSPLUS_EXT_CNID:
+ mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX);
+ break;
+ case HFSPLUS_ATTR_CNID:
+ mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX);
+ break;
+ default:
+ BUG();
+ }
return 0;
}
@@ -38,15 +50,73 @@ void hfs_find_exit(struct hfs_find_data *fd)
fd->tree = NULL;
}
-/* Find the record in bnode that best matches key (not greater than...)*/
-int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
+int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode,
+ struct hfs_find_data *fd,
+ int *begin,
+ int *end,
+ int *cur_rec)
+{
+ __be32 cur_cnid, search_cnid;
+
+ if (bnode->tree->cnid == HFSPLUS_EXT_CNID) {
+ cur_cnid = fd->key->ext.cnid;
+ search_cnid = fd->search_key->ext.cnid;
+ } else if (bnode->tree->cnid == HFSPLUS_CAT_CNID) {
+ cur_cnid = fd->key->cat.parent;
+ search_cnid = fd->search_key->cat.parent;
+ } else if (bnode->tree->cnid == HFSPLUS_ATTR_CNID) {
+ cur_cnid = fd->key->attr.cnid;
+ search_cnid = fd->search_key->attr.cnid;
+ } else
+ BUG();
+
+ if (cur_cnid == search_cnid) {
+ (*end) = (*cur_rec);
+ if ((*begin) == (*end))
+ return 1;
+ } else {
+ if (be32_to_cpu(cur_cnid) < be32_to_cpu(search_cnid))
+ (*begin) = (*cur_rec) + 1;
+ else
+ (*end) = (*cur_rec) - 1;
+ }
+
+ return 0;
+}
+
+int hfs_find_rec_by_key(struct hfs_bnode *bnode,
+ struct hfs_find_data *fd,
+ int *begin,
+ int *end,
+ int *cur_rec)
{
int cmpval;
+
+ cmpval = bnode->tree->keycmp(fd->key, fd->search_key);
+ if (!cmpval) {
+ (*end) = (*cur_rec);
+ return 1;
+ }
+ if (cmpval < 0)
+ (*begin) = (*cur_rec) + 1;
+ else
+ *(end) = (*cur_rec) - 1;
+
+ return 0;
+}
+
+/* Find the record in bnode that best matches key (not greater than...)*/
+int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd,
+ search_strategy_t rec_found)
+{
u16 off, len, keylen;
int rec;
int b, e;
int res;
+ if (!rec_found)
+ BUG();
+
b = 0;
e = bnode->num_recs - 1;
res = -ENOENT;
@@ -59,17 +129,12 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
goto fail;
}
hfs_bnode_read(bnode, fd->key, off, keylen);
- cmpval = bnode->tree->keycmp(fd->key, fd->search_key);
- if (!cmpval) {
- e = rec;
+ if (rec_found(bnode, fd, &b, &e, &rec)) {
res = 0;
goto done;
}
- if (cmpval < 0)
- b = rec + 1;
- else
- e = rec - 1;
} while (b <= e);
+
if (rec != e && e >= 0) {
len = hfs_brec_lenoff(bnode, e, &off);
keylen = hfs_brec_keylen(bnode, e);
@@ -79,19 +144,21 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
}
hfs_bnode_read(bnode, fd->key, off, keylen);
}
+
done:
fd->record = e;
fd->keyoffset = off;
fd->keylength = keylen;
fd->entryoffset = off + keylen;
fd->entrylength = len - keylen;
+
fail:
return res;
}
/* Traverse a B*Tree from the root to a leaf finding best fit to key */
/* Return allocated copy of node found, set recnum to best record */
-int hfs_brec_find(struct hfs_find_data *fd)
+int hfs_brec_find(struct hfs_find_data *fd, search_strategy_t do_key_compare)
{
struct hfs_btree *tree;
struct hfs_bnode *bnode;
@@ -122,7 +189,7 @@ int hfs_brec_find(struct hfs_find_data *fd)
goto invalid;
bnode->parent = parent;
- res = __hfs_brec_find(bnode, fd);
+ res = __hfs_brec_find(bnode, fd, do_key_compare);
if (!height)
break;
if (fd->record < 0)
@@ -149,7 +216,7 @@ int hfs_brec_read(struct hfs_find_data *fd, void *rec, int rec_len)
{
int res;
- res = hfs_brec_find(fd);
+ res = hfs_brec_find(fd, hfs_find_rec_by_key);
if (res)
return res;
if (fd->entrylength > rec_len)
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 1c42cc5b899f..5c125ce6bd72 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -62,7 +62,8 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off)
tree = node->tree;
if (node->type == HFS_NODE_LEAF ||
- tree->attributes & HFS_TREE_VARIDXKEYS)
+ tree->attributes & HFS_TREE_VARIDXKEYS ||
+ node->tree->cnid == HFSPLUS_ATTR_CNID)
key_len = hfs_bnode_read_u16(node, off) + 2;
else
key_len = tree->max_key_len + 2;
@@ -314,7 +315,8 @@ void hfs_bnode_dump(struct hfs_bnode *node)
if (i && node->type == HFS_NODE_INDEX) {
int tmp;
- if (node->tree->attributes & HFS_TREE_VARIDXKEYS)
+ if (node->tree->attributes & HFS_TREE_VARIDXKEYS ||
+ node->tree->cnid == HFSPLUS_ATTR_CNID)
tmp = hfs_bnode_read_u16(node, key_off) + 2;
else
tmp = node->tree->max_key_len + 2;
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 2a734cfccc92..298d4e45604b 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -36,7 +36,8 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec)
return 0;
if ((node->type == HFS_NODE_INDEX) &&
- !(node->tree->attributes & HFS_TREE_VARIDXKEYS)) {
+ !(node->tree->attributes & HFS_TREE_VARIDXKEYS) &&
+ (node->tree->cnid != HFSPLUS_ATTR_CNID)) {
retval = node->tree->max_key_len + 2;
} else {
recoff = hfs_bnode_read_u16(node,
@@ -151,12 +152,13 @@ skip:
/* get index key */
hfs_bnode_read_key(new_node, fd->search_key, 14);
- __hfs_brec_find(fd->bnode, fd);
+ __hfs_brec_find(fd->bnode, fd, hfs_find_rec_by_key);
hfs_bnode_put(new_node);
new_node = NULL;
- if (tree->attributes & HFS_TREE_VARIDXKEYS)
+ if ((tree->attributes & HFS_TREE_VARIDXKEYS) ||
+ (tree->cnid == HFSPLUS_ATTR_CNID))
key_len = be16_to_cpu(fd->search_key->key_len) + 2;
else {
fd->search_key->key_len =
@@ -201,7 +203,7 @@ again:
hfs_bnode_put(node);
node = fd->bnode = parent;
- __hfs_brec_find(node, fd);
+ __hfs_brec_find(node, fd, hfs_find_rec_by_key);
goto again;
}
hfs_bnode_write_u16(node,
@@ -367,12 +369,13 @@ again:
parent = hfs_bnode_find(tree, node->parent);
if (IS_ERR(parent))
return PTR_ERR(parent);
- __hfs_brec_find(parent, fd);
+ __hfs_brec_find(parent, fd, hfs_find_rec_by_key);
hfs_bnode_dump(parent);
rec = fd->record;
/* size difference between old and new key */
- if (tree->attributes & HFS_TREE_VARIDXKEYS)
+ if ((tree->attributes & HFS_TREE_VARIDXKEYS) ||
+ (tree->cnid == HFSPLUS_ATTR_CNID))
newkeylen = hfs_bnode_read_u16(node, 14) + 2;
else
fd->keylength = newkeylen = tree->max_key_len + 2;
@@ -427,7 +430,7 @@ skip:
hfs_bnode_read_key(new_node, fd->search_key, 14);
cnid = cpu_to_be32(new_node->this);
- __hfs_brec_find(fd->bnode, fd);
+ __hfs_brec_find(fd->bnode, fd, hfs_find_rec_by_key);
hfs_brec_insert(fd, &cnid, sizeof(cnid));
hfs_bnode_put(fd->bnode);
hfs_bnode_put(new_node);
@@ -495,13 +498,15 @@ static int hfs_btree_inc_height(struct hfs_btree *tree)
/* insert old root idx into new root */
node->parent = tree->root;
if (node->type == HFS_NODE_LEAF ||
- tree->attributes & HFS_TREE_VARIDXKEYS)
+ tree->attributes & HFS_TREE_VARIDXKEYS ||
+ tree->cnid == HFSPLUS_ATTR_CNID)
key_size = hfs_bnode_read_u16(node, 14) + 2;
else
key_size = tree->max_key_len + 2;
hfs_bnode_copy(new_node, 14, node, 14, key_size);
- if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) {
+ if (!(tree->attributes & HFS_TREE_VARIDXKEYS) &&
+ (tree->cnid != HFSPLUS_ATTR_CNID)) {
key_size = tree->max_key_len + 2;
hfs_bnode_write_u16(new_node, 14, tree->max_key_len);
}
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 685d07d0ed18..efb689c21a95 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -98,6 +98,14 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
}
break;
+ case HFSPLUS_ATTR_CNID:
+ if (tree->max_key_len != HFSPLUS_ATTR_KEYLEN - sizeof(u16)) {
+ printk(KERN_ERR "hfs: invalid attributes max_key_len %d\n",
+ tree->max_key_len);
+ goto fail_page;
+ }
+ tree->keycmp = hfsplus_attr_bin_cmp_key;
+ break;
default:
printk(KERN_ERR "hfs: unknown B*Tree requested\n");
goto fail_page;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 798d9c4c5e71..840d71edd193 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -45,7 +45,8 @@ void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
key->cat.parent = cpu_to_be32(parent);
if (str) {
- hfsplus_asc2uni(sb, &key->cat.name, str->name, str->len);
+ hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
+ str->name, str->len);
len = be16_to_cpu(key->cat.name.length);
} else {
key->cat.name.length = 0;
@@ -167,7 +168,8 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
entry->type = cpu_to_be16(type);
entry->thread.reserved = 0;
entry->thread.parentID = cpu_to_be32(parentid);
- hfsplus_asc2uni(sb, &entry->thread.nodeName, str->name, str->len);
+ hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
+ str->name, str->len);
return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
}
@@ -198,7 +200,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
hfsplus_cat_build_key_uni(fd->search_key,
be32_to_cpu(tmp.thread.parentID),
&tmp.thread.nodeName);
- return hfs_brec_find(fd);
+ return hfs_brec_find(fd, hfs_find_rec_by_key);
}
int hfsplus_create_cat(u32 cnid, struct inode *dir,
@@ -221,7 +223,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
S_ISDIR(inode->i_mode) ?
HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
dir->i_ino, str);
- err = hfs_brec_find(&fd);
+ err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
err = -EEXIST;
@@ -233,7 +235,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
- err = hfs_brec_find(&fd);
+ err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
/* panic? */
if (!err)
@@ -253,7 +255,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
err1:
hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
- if (!hfs_brec_find(&fd))
+ if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
hfs_brec_remove(&fd);
err2:
hfs_find_exit(&fd);
@@ -279,7 +281,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
int len;
hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
- err = hfs_brec_find(&fd);
+ err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -296,7 +298,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
} else
hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
- err = hfs_brec_find(&fd);
+ err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -326,7 +328,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
goto out;
hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
- err = hfs_brec_find(&fd);
+ err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -337,6 +339,12 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
dir->i_size--;
dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
+
+ if (type == HFSPLUS_FILE || type == HFSPLUS_FOLDER) {
+ if (HFSPLUS_SB(sb)->attr_tree)
+ hfsplus_delete_all_attrs(dir, cnid);
+ }
+
out:
hfs_find_exit(&fd);
@@ -363,7 +371,7 @@ int hfsplus_rename_cat(u32 cnid,
/* find the old dir entry and read the data */
hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
- err = hfs_brec_find(&src_fd);
+ err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) {
@@ -376,7 +384,7 @@ int hfsplus_rename_cat(u32 cnid,
/* create new dir entry with the data from the old entry */
hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
- err = hfs_brec_find(&dst_fd);
+ err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
err = -EEXIST;
@@ -391,7 +399,7 @@ int hfsplus_rename_cat(u32 cnid,
/* finally remove the old entry */
hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
- err = hfs_brec_find(&src_fd);
+ err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
err = hfs_brec_remove(&src_fd);
@@ -402,7 +410,7 @@ int hfsplus_rename_cat(u32 cnid,
/* remove old thread entry */
hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
- err = hfs_brec_find(&src_fd);
+ err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
type = hfs_bnode_read_u16(src_fd.bnode, src_fd.entryoffset);
@@ -414,7 +422,7 @@ int hfsplus_rename_cat(u32 cnid,
hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
dst_dir->i_ino, dst_name);
- err = hfs_brec_find(&dst_fd);
+ err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
err = -EEXIST;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 6b9f921ef2fa..47699d87bc33 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -15,6 +15,7 @@
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
+#include "xattr.h"
static inline void hfsplus_instantiate(struct dentry *dentry,
struct inode *inode, u32 cnid)
@@ -138,7 +139,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
if (err)
return err;
hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
- err = hfs_brec_find(&fd);
+ err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -421,6 +422,15 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
if (res)
goto out_err;
+ res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
+ if (res == -EOPNOTSUPP)
+ res = 0; /* Operation is not supported. */
+ else if (res) {
+ /* Try to delete anyway without error analysis. */
+ hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name);
+ goto out_err;
+ }
+
hfsplus_instantiate(dentry, inode, inode->i_ino);
mark_inode_dirty(inode);
goto out;
@@ -450,15 +460,26 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
init_special_inode(inode, mode, rdev);
res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode);
- if (res) {
- clear_nlink(inode);
- hfsplus_delete_inode(inode);
- iput(inode);
- goto out;
+ if (res)
+ goto failed_mknod;
+
+ res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
+ if (res == -EOPNOTSUPP)
+ res = 0; /* Operation is not supported. */
+ else if (res) {
+ /* Try to delete anyway without error analysis. */
+ hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name);
+ goto failed_mknod;
}
hfsplus_instantiate(dentry, inode, inode->i_ino);
mark_inode_dirty(inode);
+ goto out;
+
+failed_mknod:
+ clear_nlink(inode);
+ hfsplus_delete_inode(inode);
+ iput(inode);
out:
mutex_unlock(&sbi->vh_mutex);
return res;
@@ -499,15 +520,19 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
}
const struct inode_operations hfsplus_dir_inode_operations = {
- .lookup = hfsplus_lookup,
- .create = hfsplus_create,
- .link = hfsplus_link,
- .unlink = hfsplus_unlink,
- .mkdir = hfsplus_mkdir,
- .rmdir = hfsplus_rmdir,
- .symlink = hfsplus_symlink,
- .mknod = hfsplus_mknod,
- .rename = hfsplus_rename,
+ .lookup = hfsplus_lookup,
+ .create = hfsplus_create,
+ .link = hfsplus_link,
+ .unlink = hfsplus_unlink,
+ .mkdir = hfsplus_mkdir,
+ .rmdir = hfsplus_rmdir,
+ .symlink = hfsplus_symlink,
+ .mknod = hfsplus_mknod,
+ .rename = hfsplus_rename,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = hfsplus_listxattr,
+ .removexattr = hfsplus_removexattr,
};
const struct file_operations hfsplus_dir_operations = {
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index eba76eab6d62..a94f0f779d5e 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -95,7 +95,7 @@ static void __hfsplus_ext_write_extent(struct inode *inode,
HFSPLUS_IS_RSRC(inode) ?
HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA);
- res = hfs_brec_find(fd);
+ res = hfs_brec_find(fd, hfs_find_rec_by_key);
if (hip->extent_state & HFSPLUS_EXT_NEW) {
if (res != -ENOENT)
return;
@@ -154,7 +154,7 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd,
hfsplus_ext_build_key(fd->search_key, cnid, block, type);
fd->key->ext.cnid = 0;
- res = hfs_brec_find(fd);
+ res = hfs_brec_find(fd, hfs_find_rec_by_key);
if (res && res != -ENOENT)
return res;
if (fd->key->ext.cnid != fd->search_key->ext.cnid ||
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index a6da86b1b4c1..05b11f36024c 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -23,6 +23,7 @@
#define DBG_SUPER 0x00000010
#define DBG_EXTENT 0x00000020
#define DBG_BITMAP 0x00000040
+#define DBG_ATTR_MOD 0x00000080
#if 0
#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD)
@@ -46,6 +47,13 @@ typedef int (*btree_keycmp)(const hfsplus_btree_key *,
#define NODE_HASH_SIZE 256
+/* B-tree mutex nested subclasses */
+enum hfsplus_btree_mutex_classes {
+ CATALOG_BTREE_MUTEX,
+ EXTENTS_BTREE_MUTEX,
+ ATTR_BTREE_MUTEX,
+};
+
/* An HFS+ BTree held in memory */
struct hfs_btree {
struct super_block *sb;
@@ -223,6 +231,7 @@ struct hfsplus_inode_info {
#define HFSPLUS_I_CAT_DIRTY 1 /* has changes in the catalog tree */
#define HFSPLUS_I_EXT_DIRTY 2 /* has changes in the extent tree */
#define HFSPLUS_I_ALLOC_DIRTY 3 /* has changes in the allocation file */
+#define HFSPLUS_I_ATTR_DIRTY 4 /* has changes in the attributes tree */
#define HFSPLUS_IS_RSRC(inode) \
test_bit(HFSPLUS_I_RSRC, &HFSPLUS_I(inode)->flags)
@@ -302,7 +311,7 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
#define hfs_brec_remove hfsplus_brec_remove
#define hfs_find_init hfsplus_find_init
#define hfs_find_exit hfsplus_find_exit
-#define __hfs_brec_find __hplusfs_brec_find
+#define __hfs_brec_find __hfsplus_brec_find
#define hfs_brec_find hfsplus_brec_find
#define hfs_brec_read hfsplus_brec_read
#define hfs_brec_goto hfsplus_brec_goto
@@ -324,10 +333,33 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
*/
#define HFSPLUS_IOC_BLESS _IO('h', 0x80)
+typedef int (*search_strategy_t)(struct hfs_bnode *,
+ struct hfs_find_data *,
+ int *, int *, int *);
+
/*
* Functions in any *.c used in other files
*/
+/* attributes.c */
+int hfsplus_create_attr_tree_cache(void);
+void hfsplus_destroy_attr_tree_cache(void);
+hfsplus_attr_entry *hfsplus_alloc_attr_entry(void);
+void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry_p);
+int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *,
+ const hfsplus_btree_key *);
+int hfsplus_attr_build_key(struct super_block *, hfsplus_btree_key *,
+ u32, const char *);
+void hfsplus_attr_build_key_uni(hfsplus_btree_key *key,
+ u32 cnid,
+ struct hfsplus_attr_unistr *name);
+int hfsplus_find_attr(struct super_block *, u32,
+ const char *, struct hfs_find_data *);
+int hfsplus_attr_exists(struct inode *inode, const char *name);
+int hfsplus_create_attr(struct inode *, const char *, const void *, size_t);
+int hfsplus_delete_attr(struct inode *, const char *);
+int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid);
+
/* bitmap.c */
int hfsplus_block_allocate(struct super_block *, u32, u32, u32 *);
int hfsplus_block_free(struct super_block *, u32, u32);
@@ -369,8 +401,15 @@ int hfs_brec_remove(struct hfs_find_data *);
/* bfind.c */
int hfs_find_init(struct hfs_btree *, struct hfs_find_data *);
void hfs_find_exit(struct hfs_find_data *);
-int __hfs_brec_find(struct hfs_bnode *, struct hfs_find_data *);
-int hfs_brec_find(struct hfs_find_data *);
+int hfs_find_1st_rec_by_cnid(struct hfs_bnode *,
+ struct hfs_find_data *,
+ int *, int *, int *);
+int hfs_find_rec_by_key(struct hfs_bnode *,
+ struct hfs_find_data *,
+ int *, int *, int *);
+int __hfs_brec_find(struct hfs_bnode *, struct hfs_find_data *,
+ search_strategy_t);
+int hfs_brec_find(struct hfs_find_data *, search_strategy_t);
int hfs_brec_read(struct hfs_find_data *, void *, int);
int hfs_brec_goto(struct hfs_find_data *, int);
@@ -417,11 +456,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
/* ioctl.c */
long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-int hfsplus_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags);
-ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
- void *value, size_t size);
-ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* options.c */
int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
@@ -446,7 +480,7 @@ int hfsplus_strcmp(const struct hfsplus_unistr *,
int hfsplus_uni2asc(struct super_block *,
const struct hfsplus_unistr *, char *, int *);
int hfsplus_asc2uni(struct super_block *,
- struct hfsplus_unistr *, const char *, int);
+ struct hfsplus_unistr *, int, const char *, int);
int hfsplus_hash_dentry(const struct dentry *dentry,
const struct inode *inode, struct qstr *str);
int hfsplus_compare_dentry(const struct dentry *parent,
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 921967e5abb1..452ede01b036 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -52,13 +52,23 @@
typedef __be32 hfsplus_cnid;
typedef __be16 hfsplus_unichr;
+#define HFSPLUS_MAX_STRLEN 255
+#define HFSPLUS_ATTR_MAX_STRLEN 127
+
/* A "string" as used in filenames, etc. */
struct hfsplus_unistr {
__be16 length;
- hfsplus_unichr unicode[255];
+ hfsplus_unichr unicode[HFSPLUS_MAX_STRLEN];
} __packed;
-#define HFSPLUS_MAX_STRLEN 255
+/*
+ * A "string" is used in attributes file
+ * for name of extended attribute
+ */
+struct hfsplus_attr_unistr {
+ __be16 length;
+ hfsplus_unichr unicode[HFSPLUS_ATTR_MAX_STRLEN];
+} __packed;
/* POSIX permissions */
struct hfsplus_perm {
@@ -291,6 +301,8 @@ struct hfsplus_cat_file {
/* File attribute bits */
#define HFSPLUS_FILE_LOCKED 0x0001
#define HFSPLUS_FILE_THREAD_EXISTS 0x0002
+#define HFSPLUS_XATTR_EXISTS 0x0004
+#define HFSPLUS_ACL_EXISTS 0x0008
/* HFS+ catalog thread (part of a cat_entry) */
struct hfsplus_cat_thread {
@@ -327,11 +339,63 @@ struct hfsplus_ext_key {
#define HFSPLUS_EXT_KEYLEN sizeof(struct hfsplus_ext_key)
+#define HFSPLUS_XATTR_FINDER_INFO_NAME "com.apple.FinderInfo"
+#define HFSPLUS_XATTR_ACL_NAME "com.apple.system.Security"
+
+#define HFSPLUS_ATTR_INLINE_DATA 0x10
+#define HFSPLUS_ATTR_FORK_DATA 0x20
+#define HFSPLUS_ATTR_EXTENTS 0x30
+
+/* HFS+ attributes tree key */
+struct hfsplus_attr_key {
+ __be16 key_len;
+ __be16 pad;
+ hfsplus_cnid cnid;
+ __be32 start_block;
+ struct hfsplus_attr_unistr key_name;
+} __packed;
+
+#define HFSPLUS_ATTR_KEYLEN sizeof(struct hfsplus_attr_key)
+
+/* HFS+ fork data attribute */
+struct hfsplus_attr_fork_data {
+ __be32 record_type;
+ __be32 reserved;
+ struct hfsplus_fork_raw the_fork;
+} __packed;
+
+/* HFS+ extension attribute */
+struct hfsplus_attr_extents {
+ __be32 record_type;
+ __be32 reserved;
+ struct hfsplus_extent extents;
+} __packed;
+
+#define HFSPLUS_MAX_INLINE_DATA_SIZE 3802
+
+/* HFS+ attribute inline data */
+struct hfsplus_attr_inline_data {
+ __be32 record_type;
+ __be32 reserved1;
+ u8 reserved2[6];
+ __be16 length;
+ u8 raw_bytes[HFSPLUS_MAX_INLINE_DATA_SIZE];
+} __packed;
+
+/* A data record in the attributes tree */
+typedef union {
+ __be32 record_type;
+ struct hfsplus_attr_fork_data fork_data;
+ struct hfsplus_attr_extents extents;
+ struct hfsplus_attr_inline_data inline_data;
+} __packed hfsplus_attr_entry;
+
/* HFS+ generic BTree key */
typedef union {
__be16 key_len;
struct hfsplus_cat_key cat;
struct hfsplus_ext_key ext;
+ struct hfsplus_attr_key attr;
} __packed hfsplus_btree_key;
#endif
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 799b336b59f9..0aff99fdeb16 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,9 +14,11 @@
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/sched.h>
+#include <linux/aio.h>
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
+#include "xattr.h"
static int hfsplus_readpage(struct file *file, struct page *page)
{
@@ -348,6 +350,18 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
error = error2;
}
+ if (test_and_clear_bit(HFSPLUS_I_ATTR_DIRTY, &hip->flags)) {
+ if (sbi->attr_tree) {
+ error2 =
+ filemap_write_and_wait(
+ sbi->attr_tree->inode->i_mapping);
+ if (!error)
+ error = error2;
+ } else {
+ printk(KERN_ERR "hfs: sync non-existent attributes tree\n");
+ }
+ }
+
if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) {
error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping);
if (!error)
@@ -365,9 +379,10 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
static const struct inode_operations hfsplus_file_inode_operations = {
.lookup = hfsplus_file_lookup,
.setattr = hfsplus_setattr,
- .setxattr = hfsplus_setxattr,
- .getxattr = hfsplus_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = hfsplus_listxattr,
+ .removexattr = hfsplus_removexattr,
};
static const struct file_operations hfsplus_file_operations = {
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 09addc8615fa..83a434d0c2e7 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -16,7 +16,6 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/sched.h>
-#include <linux/xattr.h>
#include <asm/uaccess.h>
#include "hfsplus_fs.h"
@@ -151,110 +150,3 @@ long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return -ENOTTY;
}
}
-
-int hfsplus_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
-{
- struct inode *inode = dentry->d_inode;
- struct hfs_find_data fd;
- hfsplus_cat_entry entry;
- struct hfsplus_cat_file *file;
- int res;
-
- if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode))
- return -EOPNOTSUPP;
-
- res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
- if (res)
- return res;
- res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
- if (res)
- goto out;
- hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
- sizeof(struct hfsplus_cat_file));
- file = &entry.file;
-
- if (!strcmp(name, "hfs.type")) {
- if (size == 4)
- memcpy(&file->user_info.fdType, value, 4);
- else
- res = -ERANGE;
- } else if (!strcmp(name, "hfs.creator")) {
- if (size == 4)
- memcpy(&file->user_info.fdCreator, value, 4);
- else
- res = -ERANGE;
- } else
- res = -EOPNOTSUPP;
- if (!res) {
- hfs_bnode_write(fd.bnode, &entry, fd.entryoffset,
- sizeof(struct hfsplus_cat_file));
- hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
- }
-out:
- hfs_find_exit(&fd);
- return res;
-}
-
-ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
- void *value, size_t size)
-{
- struct inode *inode = dentry->d_inode;
- struct hfs_find_data fd;
- hfsplus_cat_entry entry;
- struct hfsplus_cat_file *file;
- ssize_t res = 0;
-
- if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode))
- return -EOPNOTSUPP;
-
- if (size) {
- res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
- if (res)
- return res;
- res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
- if (res)
- goto out;
- hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
- sizeof(struct hfsplus_cat_file));
- }
- file = &entry.file;
-
- if (!strcmp(name, "hfs.type")) {
- if (size >= 4) {
- memcpy(value, &file->user_info.fdType, 4);
- res = 4;
- } else
- res = size ? -ERANGE : 4;
- } else if (!strcmp(name, "hfs.creator")) {
- if (size >= 4) {
- memcpy(value, &file->user_info.fdCreator, 4);
- res = 4;
- } else
- res = size ? -ERANGE : 4;
- } else
- res = -EOPNOTSUPP;
-out:
- if (size)
- hfs_find_exit(&fd);
- return res;
-}
-
-#define HFSPLUS_ATTRLIST_SIZE (sizeof("hfs.creator")+sizeof("hfs.type"))
-
-ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
- struct inode *inode = dentry->d_inode;
-
- if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode))
- return -EOPNOTSUPP;
-
- if (!buffer || !size)
- return HFSPLUS_ATTRLIST_SIZE;
- if (size < HFSPLUS_ATTRLIST_SIZE)
- return -ERANGE;
- strcpy(buffer, "hfs.type");
- strcpy(buffer + sizeof("hfs.type"), "hfs.creator");
-
- return HFSPLUS_ATTRLIST_SIZE;
-}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 796198d26553..974c26f96fae 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -20,6 +20,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb);
static void hfsplus_destroy_inode(struct inode *inode);
#include "hfsplus_fs.h"
+#include "xattr.h"
static int hfsplus_system_read_inode(struct inode *inode)
{
@@ -118,6 +119,7 @@ static int hfsplus_system_write_inode(struct inode *inode)
case HFSPLUS_ATTR_CNID:
fork = &vhdr->attr_file;
tree = sbi->attr_tree;
+ break;
default:
return -EIO;
}
@@ -191,6 +193,12 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
error2 = filemap_write_and_wait(sbi->ext_tree->inode->i_mapping);
if (!error)
error = error2;
+ if (sbi->attr_tree) {
+ error2 =
+ filemap_write_and_wait(sbi->attr_tree->inode->i_mapping);
+ if (!error)
+ error = error2;
+ }
error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping);
if (!error)
error = error2;
@@ -281,6 +289,7 @@ static void hfsplus_put_super(struct super_block *sb)
hfsplus_sync_fs(sb, 1);
}
+ hfs_btree_close(sbi->attr_tree);
hfs_btree_close(sbi->cat_tree);
hfs_btree_close(sbi->ext_tree);
iput(sbi->alloc_file);
@@ -477,12 +486,20 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
printk(KERN_ERR "hfs: failed to load catalog file\n");
goto out_close_ext_tree;
}
+ if (vhdr->attr_file.total_blocks != 0) {
+ sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID);
+ if (!sbi->attr_tree) {
+ printk(KERN_ERR "hfs: failed to load attributes file\n");
+ goto out_close_cat_tree;
+ }
+ }
+ sb->s_xattr = hfsplus_xattr_handlers;
inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID);
if (IS_ERR(inode)) {
printk(KERN_ERR "hfs: failed to load allocation file\n");
err = PTR_ERR(inode);
- goto out_close_cat_tree;
+ goto out_close_attr_tree;
}
sbi->alloc_file = inode;
@@ -542,10 +559,27 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
}
err = hfsplus_create_cat(sbi->hidden_dir->i_ino, root,
&str, sbi->hidden_dir);
- mutex_unlock(&sbi->vh_mutex);
- if (err)
+ if (err) {
+ mutex_unlock(&sbi->vh_mutex);
+ goto out_put_hidden_dir;
+ }
+
+ err = hfsplus_init_inode_security(sbi->hidden_dir,
+ root, &str);
+ if (err == -EOPNOTSUPP)
+ err = 0; /* Operation is not supported. */
+ else if (err) {
+ /*
+ * Try to delete anyway without
+ * error analysis.
+ */
+ hfsplus_delete_cat(sbi->hidden_dir->i_ino,
+ root, &str);
+ mutex_unlock(&sbi->vh_mutex);
goto out_put_hidden_dir;
+ }
+ mutex_unlock(&sbi->vh_mutex);
hfsplus_mark_inode_dirty(sbi->hidden_dir,
HFSPLUS_I_CAT_DIRTY);
}
@@ -562,6 +596,8 @@ out_put_root:
sb->s_root = NULL;
out_put_alloc_file:
iput(sbi->alloc_file);
+out_close_attr_tree:
+ hfs_btree_close(sbi->attr_tree);
out_close_cat_tree:
hfs_btree_close(sbi->cat_tree);
out_close_ext_tree:
@@ -635,9 +671,20 @@ static int __init init_hfsplus_fs(void)
hfsplus_init_once);
if (!hfsplus_inode_cachep)
return -ENOMEM;
+ err = hfsplus_create_attr_tree_cache();
+ if (err)
+ goto destroy_inode_cache;
err = register_filesystem(&hfsplus_fs_type);
if (err)
- kmem_cache_destroy(hfsplus_inode_cachep);
+ goto destroy_attr_tree_cache;
+ return 0;
+
+destroy_attr_tree_cache:
+ hfsplus_destroy_attr_tree_cache();
+
+destroy_inode_cache:
+ kmem_cache_destroy(hfsplus_inode_cachep);
+
return err;
}
@@ -650,6 +697,7 @@ static void __exit exit_hfsplus_fs(void)
* destroy cache.
*/
rcu_barrier();
+ hfsplus_destroy_attr_tree_cache();
kmem_cache_destroy(hfsplus_inode_cachep);
}
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index a32998f29f0b..2c2e47dcfdd8 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -295,7 +295,8 @@ static inline u16 *decompose_unichar(wchar_t uc, int *size)
return hfsplus_decompose_table + (off / 4);
}
-int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
+int hfsplus_asc2uni(struct super_block *sb,
+ struct hfsplus_unistr *ustr, int max_unistr_len,
const char *astr, int len)
{
int size, dsize, decompose;
@@ -303,7 +304,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
wchar_t c;
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
- while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
+ while (outlen < max_unistr_len && len > 0) {
size = asc2unichar(sb, astr, len, &c);
if (decompose)
@@ -311,7 +312,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
else
dstr = NULL;
if (dstr) {
- if (outlen + dsize > HFSPLUS_MAX_STRLEN)
+ if (outlen + dsize > max_unistr_len)
break;
do {
ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
new file mode 100644
index 000000000000..e8a4b0815c61
--- /dev/null
+++ b/fs/hfsplus/xattr.c
@@ -0,0 +1,709 @@
+/*
+ * linux/fs/hfsplus/xattr.c
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Logic of processing extended attributes
+ */
+
+#include "hfsplus_fs.h"
+#include "xattr.h"
+
+const struct xattr_handler *hfsplus_xattr_handlers[] = {
+ &hfsplus_xattr_osx_handler,
+ &hfsplus_xattr_user_handler,
+ &hfsplus_xattr_trusted_handler,
+ &hfsplus_xattr_security_handler,
+ NULL
+};
+
+static int strcmp_xattr_finder_info(const char *name)
+{
+ if (name) {
+ return strncmp(name, HFSPLUS_XATTR_FINDER_INFO_NAME,
+ sizeof(HFSPLUS_XATTR_FINDER_INFO_NAME));
+ }
+ return -1;
+}
+
+static int strcmp_xattr_acl(const char *name)
+{
+ if (name) {
+ return strncmp(name, HFSPLUS_XATTR_ACL_NAME,
+ sizeof(HFSPLUS_XATTR_ACL_NAME));
+ }
+ return -1;
+}
+
+static inline int is_known_namespace(const char *name)
+{
+ if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
+ strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+ strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+ strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
+ return false;
+
+ return true;
+}
+
+static int can_set_xattr(struct inode *inode, const char *name,
+ const void *value, size_t value_len)
+{
+ if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+ return -EOPNOTSUPP; /* TODO: implement ACL support */
+
+ if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) {
+ /*
+ * This makes sure that we aren't trying to set an
+ * attribute in a different namespace by prefixing it
+ * with "osx."
+ */
+ if (is_known_namespace(name + XATTR_MAC_OSX_PREFIX_LEN))
+ return -EOPNOTSUPP;
+
+ return 0;
+ }
+
+ /*
+ * Don't allow setting an attribute in an unknown namespace.
+ */
+ if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
+ strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+ strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+int __hfsplus_setxattr(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ int err = 0;
+ struct hfs_find_data cat_fd;
+ hfsplus_cat_entry entry;
+ u16 cat_entry_flags, cat_entry_type;
+ u16 folder_finderinfo_len = sizeof(struct DInfo) +
+ sizeof(struct DXInfo);
+ u16 file_finderinfo_len = sizeof(struct FInfo) +
+ sizeof(struct FXInfo);
+
+ if ((!S_ISREG(inode->i_mode) &&
+ !S_ISDIR(inode->i_mode)) ||
+ HFSPLUS_IS_RSRC(inode))
+ return -EOPNOTSUPP;
+
+ err = can_set_xattr(inode, name, value, size);
+ if (err)
+ return err;
+
+ if (strncmp(name, XATTR_MAC_OSX_PREFIX,
+ XATTR_MAC_OSX_PREFIX_LEN) == 0)
+ name += XATTR_MAC_OSX_PREFIX_LEN;
+
+ if (value == NULL) {
+ value = "";
+ size = 0;
+ }
+
+ err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd);
+ if (err) {
+ printk(KERN_ERR "hfs: can't init xattr find struct\n");
+ return err;
+ }
+
+ err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd);
+ if (err) {
+ printk(KERN_ERR "hfs: catalog searching failed\n");
+ goto end_setxattr;
+ }
+
+ if (!strcmp_xattr_finder_info(name)) {
+ if (flags & XATTR_CREATE) {
+ printk(KERN_ERR "hfs: xattr exists yet\n");
+ err = -EOPNOTSUPP;
+ goto end_setxattr;
+ }
+ hfs_bnode_read(cat_fd.bnode, &entry, cat_fd.entryoffset,
+ sizeof(hfsplus_cat_entry));
+ if (be16_to_cpu(entry.type) == HFSPLUS_FOLDER) {
+ if (size == folder_finderinfo_len) {
+ memcpy(&entry.folder.user_info, value,
+ folder_finderinfo_len);
+ hfs_bnode_write(cat_fd.bnode, &entry,
+ cat_fd.entryoffset,
+ sizeof(struct hfsplus_cat_folder));
+ hfsplus_mark_inode_dirty(inode,
+ HFSPLUS_I_CAT_DIRTY);
+ } else {
+ err = -ERANGE;
+ goto end_setxattr;
+ }
+ } else if (be16_to_cpu(entry.type) == HFSPLUS_FILE) {
+ if (size == file_finderinfo_len) {
+ memcpy(&entry.file.user_info, value,
+ file_finderinfo_len);
+ hfs_bnode_write(cat_fd.bnode, &entry,
+ cat_fd.entryoffset,
+ sizeof(struct hfsplus_cat_file));
+ hfsplus_mark_inode_dirty(inode,
+ HFSPLUS_I_CAT_DIRTY);
+ } else {
+ err = -ERANGE;
+ goto end_setxattr;
+ }
+ } else {
+ err = -EOPNOTSUPP;
+ goto end_setxattr;
+ }
+ goto end_setxattr;
+ }
+
+ if (!HFSPLUS_SB(inode->i_sb)->attr_tree) {
+ err = -EOPNOTSUPP;
+ goto end_setxattr;
+ }
+
+ if (hfsplus_attr_exists(inode, name)) {
+ if (flags & XATTR_CREATE) {
+ printk(KERN_ERR "hfs: xattr exists yet\n");
+ err = -EOPNOTSUPP;
+ goto end_setxattr;
+ }
+ err = hfsplus_delete_attr(inode, name);
+ if (err)
+ goto end_setxattr;
+ err = hfsplus_create_attr(inode, name, value, size);
+ if (err)
+ goto end_setxattr;
+ } else {
+ if (flags & XATTR_REPLACE) {
+ printk(KERN_ERR "hfs: cannot replace xattr\n");
+ err = -EOPNOTSUPP;
+ goto end_setxattr;
+ }
+ err = hfsplus_create_attr(inode, name, value, size);
+ if (err)
+ goto end_setxattr;
+ }
+
+ cat_entry_type = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset);
+ if (cat_entry_type == HFSPLUS_FOLDER) {
+ cat_entry_flags = hfs_bnode_read_u16(cat_fd.bnode,
+ cat_fd.entryoffset +
+ offsetof(struct hfsplus_cat_folder, flags));
+ cat_entry_flags |= HFSPLUS_XATTR_EXISTS;
+ if (!strcmp_xattr_acl(name))
+ cat_entry_flags |= HFSPLUS_ACL_EXISTS;
+ hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
+ offsetof(struct hfsplus_cat_folder, flags),
+ cat_entry_flags);
+ hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
+ } else if (cat_entry_type == HFSPLUS_FILE) {
+ cat_entry_flags = hfs_bnode_read_u16(cat_fd.bnode,
+ cat_fd.entryoffset +
+ offsetof(struct hfsplus_cat_file, flags));
+ cat_entry_flags |= HFSPLUS_XATTR_EXISTS;
+ if (!strcmp_xattr_acl(name))
+ cat_entry_flags |= HFSPLUS_ACL_EXISTS;
+ hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
+ offsetof(struct hfsplus_cat_file, flags),
+ cat_entry_flags);
+ hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
+ } else {
+ printk(KERN_ERR "hfs: invalid catalog entry type\n");
+ err = -EIO;
+ goto end_setxattr;
+ }
+
+end_setxattr:
+ hfs_find_exit(&cat_fd);
+ return err;
+}
+
+static inline int is_osx_xattr(const char *xattr_name)
+{
+ return !is_known_namespace(xattr_name);
+}
+
+static int name_len(const char *xattr_name, int xattr_name_len)
+{
+ int len = xattr_name_len + 1;
+
+ if (is_osx_xattr(xattr_name))
+ len += XATTR_MAC_OSX_PREFIX_LEN;
+
+ return len;
+}
+
+static int copy_name(char *buffer, const char *xattr_name, int name_len)
+{
+ int len = name_len;
+ int offset = 0;
+
+ if (is_osx_xattr(xattr_name)) {
+ strncpy(buffer, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN);
+ offset += XATTR_MAC_OSX_PREFIX_LEN;
+ len += XATTR_MAC_OSX_PREFIX_LEN;
+ }
+
+ strncpy(buffer + offset, xattr_name, name_len);
+ memset(buffer + offset + name_len, 0, 1);
+ len += 1;
+
+ return len;
+}
+
+static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry,
+ void *value, size_t size)
+{
+ ssize_t res = 0;
+ struct inode *inode = dentry->d_inode;
+ struct hfs_find_data fd;
+ u16 entry_type;
+ u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo);
+ u16 file_rec_len = sizeof(struct FInfo) + sizeof(struct FXInfo);
+ u16 record_len = max(folder_rec_len, file_rec_len);
+ u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)];
+ u8 file_finder_info[sizeof(struct FInfo) + sizeof(struct FXInfo)];
+
+ if (size >= record_len) {
+ res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
+ if (res) {
+ printk(KERN_ERR "hfs: can't init xattr find struct\n");
+ return res;
+ }
+ res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
+ if (res)
+ goto end_getxattr_finder_info;
+ entry_type = hfs_bnode_read_u16(fd.bnode, fd.entryoffset);
+
+ if (entry_type == HFSPLUS_FOLDER) {
+ hfs_bnode_read(fd.bnode, folder_finder_info,
+ fd.entryoffset +
+ offsetof(struct hfsplus_cat_folder, user_info),
+ folder_rec_len);
+ memcpy(value, folder_finder_info, folder_rec_len);
+ res = folder_rec_len;
+ } else if (entry_type == HFSPLUS_FILE) {
+ hfs_bnode_read(fd.bnode, file_finder_info,
+ fd.entryoffset +
+ offsetof(struct hfsplus_cat_file, user_info),
+ file_rec_len);
+ memcpy(value, file_finder_info, file_rec_len);
+ res = file_rec_len;
+ } else {
+ res = -EOPNOTSUPP;
+ goto end_getxattr_finder_info;
+ }
+ } else
+ res = size ? -ERANGE : record_len;
+
+end_getxattr_finder_info:
+ if (size >= record_len)
+ hfs_find_exit(&fd);
+ return res;
+}
+
+ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
+ void *value, size_t size)
+{
+ struct inode *inode = dentry->d_inode;
+ struct hfs_find_data fd;
+ hfsplus_attr_entry *entry;
+ __be32 xattr_record_type;
+ u32 record_type;
+ u16 record_length = 0;
+ ssize_t res = 0;
+
+ if ((!S_ISREG(inode->i_mode) &&
+ !S_ISDIR(inode->i_mode)) ||
+ HFSPLUS_IS_RSRC(inode))
+ return -EOPNOTSUPP;
+
+ if (strncmp(name, XATTR_MAC_OSX_PREFIX,
+ XATTR_MAC_OSX_PREFIX_LEN) == 0) {
+ /* skip "osx." prefix */
+ name += XATTR_MAC_OSX_PREFIX_LEN;
+ /*
+ * Don't allow retrieving properly prefixed attributes
+ * by prepending them with "osx."
+ */
+ if (is_known_namespace(name))
+ return -EOPNOTSUPP;
+ }
+
+ if (!strcmp_xattr_finder_info(name))
+ return hfsplus_getxattr_finder_info(dentry, value, size);
+
+ if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
+ return -EOPNOTSUPP;
+
+ entry = hfsplus_alloc_attr_entry();
+ if (!entry) {
+ printk(KERN_ERR "hfs: can't allocate xattr entry\n");
+ return -ENOMEM;
+ }
+
+ res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd);
+ if (res) {
+ printk(KERN_ERR "hfs: can't init xattr find struct\n");
+ goto failed_getxattr_init;
+ }
+
+ res = hfsplus_find_attr(inode->i_sb, inode->i_ino, name, &fd);
+ if (res) {
+ if (res == -ENOENT)
+ res = -ENODATA;
+ else
+ printk(KERN_ERR "hfs: xattr searching failed\n");
+ goto out;
+ }
+
+ hfs_bnode_read(fd.bnode, &xattr_record_type,
+ fd.entryoffset, sizeof(xattr_record_type));
+ record_type = be32_to_cpu(xattr_record_type);
+ if (record_type == HFSPLUS_ATTR_INLINE_DATA) {
+ record_length = hfs_bnode_read_u16(fd.bnode,
+ fd.entryoffset +
+ offsetof(struct hfsplus_attr_inline_data,
+ length));
+ if (record_length > HFSPLUS_MAX_INLINE_DATA_SIZE) {
+ printk(KERN_ERR "hfs: invalid xattr record size\n");
+ res = -EIO;
+ goto out;
+ }
+ } else if (record_type == HFSPLUS_ATTR_FORK_DATA ||
+ record_type == HFSPLUS_ATTR_EXTENTS) {
+ printk(KERN_ERR "hfs: only inline data xattr are supported\n");
+ res = -EOPNOTSUPP;
+ goto out;
+ } else {
+ printk(KERN_ERR "hfs: invalid xattr record\n");
+ res = -EIO;
+ goto out;
+ }
+
+ if (size) {
+ hfs_bnode_read(fd.bnode, entry, fd.entryoffset,
+ offsetof(struct hfsplus_attr_inline_data,
+ raw_bytes) + record_length);
+ }
+
+ if (size >= record_length) {
+ memcpy(value, entry->inline_data.raw_bytes, record_length);
+ res = record_length;
+ } else
+ res = size ? -ERANGE : record_length;
+
+out:
+ hfs_find_exit(&fd);
+
+failed_getxattr_init:
+ hfsplus_destroy_attr_entry(entry);
+ return res;
+}
+
+static inline int can_list(const char *xattr_name)
+{
+ if (!xattr_name)
+ return 0;
+
+ return strncmp(xattr_name, XATTR_TRUSTED_PREFIX,
+ XATTR_TRUSTED_PREFIX_LEN) ||
+ capable(CAP_SYS_ADMIN);
+}
+
+static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry,
+ char *buffer, size_t size)
+{
+ ssize_t res = 0;
+ struct inode *inode = dentry->d_inode;
+ struct hfs_find_data fd;
+ u16 entry_type;
+ u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)];
+ u8 file_finder_info[sizeof(struct FInfo) + sizeof(struct FXInfo)];
+ unsigned long len, found_bit;
+ int xattr_name_len, symbols_count;
+
+ res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
+ if (res) {
+ printk(KERN_ERR "hfs: can't init xattr find struct\n");
+ return res;
+ }
+
+ res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
+ if (res)
+ goto end_listxattr_finder_info;
+
+ entry_type = hfs_bnode_read_u16(fd.bnode, fd.entryoffset);
+ if (entry_type == HFSPLUS_FOLDER) {
+ len = sizeof(struct DInfo) + sizeof(struct DXInfo);
+ hfs_bnode_read(fd.bnode, folder_finder_info,
+ fd.entryoffset +
+ offsetof(struct hfsplus_cat_folder, user_info),
+ len);
+ found_bit = find_first_bit((void *)folder_finder_info, len*8);
+ } else if (entry_type == HFSPLUS_FILE) {
+ len = sizeof(struct FInfo) + sizeof(struct FXInfo);
+ hfs_bnode_read(fd.bnode, file_finder_info,
+ fd.entryoffset +
+ offsetof(struct hfsplus_cat_file, user_info),
+ len);
+ found_bit = find_first_bit((void *)file_finder_info, len*8);
+ } else {
+ res = -EOPNOTSUPP;
+ goto end_listxattr_finder_info;
+ }
+
+ if (found_bit >= (len*8))
+ res = 0;
+ else {
+ symbols_count = sizeof(HFSPLUS_XATTR_FINDER_INFO_NAME) - 1;
+ xattr_name_len =
+ name_len(HFSPLUS_XATTR_FINDER_INFO_NAME, symbols_count);
+ if (!buffer || !size) {
+ if (can_list(HFSPLUS_XATTR_FINDER_INFO_NAME))
+ res = xattr_name_len;
+ } else if (can_list(HFSPLUS_XATTR_FINDER_INFO_NAME)) {
+ if (size < xattr_name_len)
+ res = -ERANGE;
+ else {
+ res = copy_name(buffer,
+ HFSPLUS_XATTR_FINDER_INFO_NAME,
+ symbols_count);
+ }
+ }
+ }
+
+end_listxattr_finder_info:
+ hfs_find_exit(&fd);
+
+ return res;
+}
+
+ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+ ssize_t err;
+ ssize_t res = 0;
+ struct inode *inode = dentry->d_inode;
+ struct hfs_find_data fd;
+ u16 key_len = 0;
+ struct hfsplus_attr_key attr_key;
+ char strbuf[HFSPLUS_ATTR_MAX_STRLEN +
+ XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
+ int xattr_name_len;
+
+ if ((!S_ISREG(inode->i_mode) &&
+ !S_ISDIR(inode->i_mode)) ||
+ HFSPLUS_IS_RSRC(inode))
+ return -EOPNOTSUPP;
+
+ res = hfsplus_listxattr_finder_info(dentry, buffer, size);
+ if (res < 0)
+ return res;
+ else if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
+ return (res == 0) ? -EOPNOTSUPP : res;
+
+ err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd);
+ if (err) {
+ printk(KERN_ERR "hfs: can't init xattr find struct\n");
+ return err;
+ }
+
+ err = hfsplus_find_attr(inode->i_sb, inode->i_ino, NULL, &fd);
+ if (err) {
+ if (err == -ENOENT) {
+ if (res == 0)
+ res = -ENODATA;
+ goto end_listxattr;
+ } else {
+ res = err;
+ goto end_listxattr;
+ }
+ }
+
+ for (;;) {
+ key_len = hfs_bnode_read_u16(fd.bnode, fd.keyoffset);
+ if (key_len == 0 || key_len > fd.tree->max_key_len) {
+ printk(KERN_ERR "hfs: invalid xattr key length: %d\n",
+ key_len);
+ res = -EIO;
+ goto end_listxattr;
+ }
+
+ hfs_bnode_read(fd.bnode, &attr_key,
+ fd.keyoffset, key_len + sizeof(key_len));
+
+ if (be32_to_cpu(attr_key.cnid) != inode->i_ino)
+ goto end_listxattr;
+
+ xattr_name_len = HFSPLUS_ATTR_MAX_STRLEN;
+ if (hfsplus_uni2asc(inode->i_sb,
+ (const struct hfsplus_unistr *)&fd.key->attr.key_name,
+ strbuf, &xattr_name_len)) {
+ printk(KERN_ERR "hfs: unicode conversion failed\n");
+ res = -EIO;
+ goto end_listxattr;
+ }
+
+ if (!buffer || !size) {
+ if (can_list(strbuf))
+ res += name_len(strbuf, xattr_name_len);
+ } else if (can_list(strbuf)) {
+ if (size < (res + name_len(strbuf, xattr_name_len))) {
+ res = -ERANGE;
+ goto end_listxattr;
+ } else
+ res += copy_name(buffer + res,
+ strbuf, xattr_name_len);
+ }
+
+ if (hfs_brec_goto(&fd, 1))
+ goto end_listxattr;
+ }
+
+end_listxattr:
+ hfs_find_exit(&fd);
+ return res;
+}
+
+int hfsplus_removexattr(struct dentry *dentry, const char *name)
+{
+ int err = 0;
+ struct inode *inode = dentry->d_inode;
+ struct hfs_find_data cat_fd;
+ u16 flags;
+ u16 cat_entry_type;
+ int is_xattr_acl_deleted = 0;
+ int is_all_xattrs_deleted = 0;
+
+ if ((!S_ISREG(inode->i_mode) &&
+ !S_ISDIR(inode->i_mode)) ||
+ HFSPLUS_IS_RSRC(inode))
+ return -EOPNOTSUPP;
+
+ if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
+ return -EOPNOTSUPP;
+
+ err = can_set_xattr(inode, name, NULL, 0);
+ if (err)
+ return err;
+
+ if (strncmp(name, XATTR_MAC_OSX_PREFIX,
+ XATTR_MAC_OSX_PREFIX_LEN) == 0)
+ name += XATTR_MAC_OSX_PREFIX_LEN;
+
+ if (!strcmp_xattr_finder_info(name))
+ return -EOPNOTSUPP;
+
+ err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd);
+ if (err) {
+ printk(KERN_ERR "hfs: can't init xattr find struct\n");
+ return err;
+ }
+
+ err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd);
+ if (err) {
+ printk(KERN_ERR "hfs: catalog searching failed\n");
+ goto end_removexattr;
+ }
+
+ err = hfsplus_delete_attr(inode, name);
+ if (err)
+ goto end_removexattr;
+
+ is_xattr_acl_deleted = !strcmp_xattr_acl(name);
+ is_all_xattrs_deleted = !hfsplus_attr_exists(inode, NULL);
+
+ if (!is_xattr_acl_deleted && !is_all_xattrs_deleted)
+ goto end_removexattr;
+
+ cat_entry_type = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset);
+
+ if (cat_entry_type == HFSPLUS_FOLDER) {
+ flags = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset +
+ offsetof(struct hfsplus_cat_folder, flags));
+ if (is_xattr_acl_deleted)
+ flags &= ~HFSPLUS_ACL_EXISTS;
+ if (is_all_xattrs_deleted)
+ flags &= ~HFSPLUS_XATTR_EXISTS;
+ hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
+ offsetof(struct hfsplus_cat_folder, flags),
+ flags);
+ hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
+ } else if (cat_entry_type == HFSPLUS_FILE) {
+ flags = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset +
+ offsetof(struct hfsplus_cat_file, flags));
+ if (is_xattr_acl_deleted)
+ flags &= ~HFSPLUS_ACL_EXISTS;
+ if (is_all_xattrs_deleted)
+ flags &= ~HFSPLUS_XATTR_EXISTS;
+ hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
+ offsetof(struct hfsplus_cat_file, flags),
+ flags);
+ hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
+ } else {
+ printk(KERN_ERR "hfs: invalid catalog entry type\n");
+ err = -EIO;
+ goto end_removexattr;
+ }
+
+end_removexattr:
+ hfs_find_exit(&cat_fd);
+ return err;
+}
+
+static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ char xattr_name[HFSPLUS_ATTR_MAX_STRLEN +
+ XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
+ size_t len = strlen(name);
+
+ if (!strcmp(name, ""))
+ return -EINVAL;
+
+ if (len > HFSPLUS_ATTR_MAX_STRLEN)
+ return -EOPNOTSUPP;
+
+ strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
+ strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
+
+ return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+}
+
+static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
+ const void *buffer, size_t size, int flags, int type)
+{
+ char xattr_name[HFSPLUS_ATTR_MAX_STRLEN +
+ XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
+ size_t len = strlen(name);
+
+ if (!strcmp(name, ""))
+ return -EINVAL;
+
+ if (len > HFSPLUS_ATTR_MAX_STRLEN)
+ return -EOPNOTSUPP;
+
+ strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
+ strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
+
+ return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+}
+
+static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t name_len, int type)
+{
+ /*
+ * This method is not used.
+ * It is used hfsplus_listxattr() instead of generic_listxattr().
+ */
+ return -EOPNOTSUPP;
+}
+
+const struct xattr_handler hfsplus_xattr_osx_handler = {
+ .prefix = XATTR_MAC_OSX_PREFIX,
+ .list = hfsplus_osx_listxattr,
+ .get = hfsplus_osx_getxattr,
+ .set = hfsplus_osx_setxattr,
+};
diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h
new file mode 100644
index 000000000000..847b695b984d
--- /dev/null
+++ b/fs/hfsplus/xattr.h
@@ -0,0 +1,60 @@
+/*
+ * linux/fs/hfsplus/xattr.h
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Logic of processing extended attributes
+ */
+
+#ifndef _LINUX_HFSPLUS_XATTR_H
+#define _LINUX_HFSPLUS_XATTR_H
+
+#include <linux/xattr.h>
+
+extern const struct xattr_handler hfsplus_xattr_osx_handler;
+extern const struct xattr_handler hfsplus_xattr_user_handler;
+extern const struct xattr_handler hfsplus_xattr_trusted_handler;
+/*extern const struct xattr_handler hfsplus_xattr_acl_access_handler;*/
+/*extern const struct xattr_handler hfsplus_xattr_acl_default_handler;*/
+extern const struct xattr_handler hfsplus_xattr_security_handler;
+
+extern const struct xattr_handler *hfsplus_xattr_handlers[];
+
+int __hfsplus_setxattr(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags);
+
+static inline int hfsplus_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags);
+}
+
+ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
+ void *value, size_t size);
+
+ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
+
+int hfsplus_removexattr(struct dentry *dentry, const char *name);
+
+int hfsplus_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr);
+
+static inline int hfsplus_init_acl(struct inode *inode, struct inode *dir)
+{
+ /*TODO: implement*/
+ return 0;
+}
+
+static inline int hfsplus_init_inode_security(struct inode *inode,
+ struct inode *dir,
+ const struct qstr *qstr)
+{
+ int err;
+
+ err = hfsplus_init_acl(inode, dir);
+ if (!err)
+ err = hfsplus_init_security(inode, dir, qstr);
+ return err;
+}
+
+#endif
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
new file mode 100644
index 000000000000..83b842f113c5
--- /dev/null
+++ b/fs/hfsplus/xattr_security.c
@@ -0,0 +1,104 @@
+/*
+ * linux/fs/hfsplus/xattr_trusted.c
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Handler for storing security labels as extended attributes.
+ */
+
+#include <linux/security.h>
+#include "hfsplus_fs.h"
+#include "xattr.h"
+
+static int hfsplus_security_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+ size_t len = strlen(name);
+
+ if (!strcmp(name, ""))
+ return -EINVAL;
+
+ if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+ return -EOPNOTSUPP;
+
+ strcpy(xattr_name, XATTR_SECURITY_PREFIX);
+ strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
+
+ return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+}
+
+static int hfsplus_security_setxattr(struct dentry *dentry, const char *name,
+ const void *buffer, size_t size, int flags, int type)
+{
+ char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+ size_t len = strlen(name);
+
+ if (!strcmp(name, ""))
+ return -EINVAL;
+
+ if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+ return -EOPNOTSUPP;
+
+ strcpy(xattr_name, XATTR_SECURITY_PREFIX);
+ strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
+
+ return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+}
+
+static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t name_len, int type)
+{
+ /*
+ * This method is not used.
+ * It is used hfsplus_listxattr() instead of generic_listxattr().
+ */
+ return -EOPNOTSUPP;
+}
+
+static int hfsplus_initxattrs(struct inode *inode,
+ const struct xattr *xattr_array,
+ void *fs_info)
+{
+ const struct xattr *xattr;
+ char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+ size_t xattr_name_len;
+ int err = 0;
+
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ xattr_name_len = strlen(xattr->name);
+
+ if (xattr_name_len == 0)
+ continue;
+
+ if (xattr_name_len + XATTR_SECURITY_PREFIX_LEN >
+ HFSPLUS_ATTR_MAX_STRLEN)
+ return -EOPNOTSUPP;
+
+ strcpy(xattr_name, XATTR_SECURITY_PREFIX);
+ strcpy(xattr_name +
+ XATTR_SECURITY_PREFIX_LEN, xattr->name);
+ memset(xattr_name +
+ XATTR_SECURITY_PREFIX_LEN + xattr_name_len, 0, 1);
+
+ err = __hfsplus_setxattr(inode, xattr_name,
+ xattr->value, xattr->value_len, 0);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+int hfsplus_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
+{
+ return security_inode_init_security(inode, dir, qstr,
+ &hfsplus_initxattrs, NULL);
+}
+
+const struct xattr_handler hfsplus_xattr_security_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .list = hfsplus_security_listxattr,
+ .get = hfsplus_security_getxattr,
+ .set = hfsplus_security_setxattr,
+};
diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c
new file mode 100644
index 000000000000..426cee277542
--- /dev/null
+++ b/fs/hfsplus/xattr_trusted.c
@@ -0,0 +1,63 @@
+/*
+ * linux/fs/hfsplus/xattr_trusted.c
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Handler for trusted extended attributes.
+ */
+
+#include "hfsplus_fs.h"
+#include "xattr.h"
+
+static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+ size_t len = strlen(name);
+
+ if (!strcmp(name, ""))
+ return -EINVAL;
+
+ if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+ return -EOPNOTSUPP;
+
+ strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
+ strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
+
+ return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+}
+
+static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name,
+ const void *buffer, size_t size, int flags, int type)
+{
+ char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+ size_t len = strlen(name);
+
+ if (!strcmp(name, ""))
+ return -EINVAL;
+
+ if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+ return -EOPNOTSUPP;
+
+ strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
+ strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
+
+ return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+}
+
+static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t name_len, int type)
+{
+ /*
+ * This method is not used.
+ * It is used hfsplus_listxattr() instead of generic_listxattr().
+ */
+ return -EOPNOTSUPP;
+}
+
+const struct xattr_handler hfsplus_xattr_trusted_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .list = hfsplus_trusted_listxattr,
+ .get = hfsplus_trusted_getxattr,
+ .set = hfsplus_trusted_setxattr,
+};
diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c
new file mode 100644
index 000000000000..e34016561ae0
--- /dev/null
+++ b/fs/hfsplus/xattr_user.c
@@ -0,0 +1,63 @@
+/*
+ * linux/fs/hfsplus/xattr_user.c
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Handler for user extended attributes.
+ */
+
+#include "hfsplus_fs.h"
+#include "xattr.h"
+
+static int hfsplus_user_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+ size_t len = strlen(name);
+
+ if (!strcmp(name, ""))
+ return -EINVAL;
+
+ if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+ return -EOPNOTSUPP;
+
+ strcpy(xattr_name, XATTR_USER_PREFIX);
+ strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
+
+ return hfsplus_getxattr(dentry, xattr_name, buffer, size);
+}
+
+static int hfsplus_user_setxattr(struct dentry *dentry, const char *name,
+ const void *buffer, size_t size, int flags, int type)
+{
+ char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
+ size_t len = strlen(name);
+
+ if (!strcmp(name, ""))
+ return -EINVAL;
+
+ if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
+ return -EOPNOTSUPP;
+
+ strcpy(xattr_name, XATTR_USER_PREFIX);
+ strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
+
+ return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
+}
+
+static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t name_len, int type)
+{
+ /*
+ * This method is not used.
+ * It is used hfsplus_listxattr() instead of generic_listxattr().
+ */
+ return -EOPNOTSUPP;
+}
+
+const struct xattr_handler hfsplus_xattr_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .list = hfsplus_user_listxattr,
+ .get = hfsplus_user_getxattr,
+ .set = hfsplus_user_setxattr,
+};
diff --git a/fs/inode.c b/fs/inode.c
index 14084b72b259..83a601c2ad55 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -798,11 +798,10 @@ static struct inode *find_inode(struct super_block *sb,
int (*test)(struct inode *, void *),
void *data)
{
- struct hlist_node *node;
struct inode *inode = NULL;
repeat:
- hlist_for_each_entry(inode, node, head, i_hash) {
+ hlist_for_each_entry(inode, head, i_hash) {
spin_lock(&inode->i_lock);
if (inode->i_sb != sb) {
spin_unlock(&inode->i_lock);
@@ -830,11 +829,10 @@ repeat:
static struct inode *find_inode_fast(struct super_block *sb,
struct hlist_head *head, unsigned long ino)
{
- struct hlist_node *node;
struct inode *inode = NULL;
repeat:
- hlist_for_each_entry(inode, node, head, i_hash) {
+ hlist_for_each_entry(inode, head, i_hash) {
spin_lock(&inode->i_lock);
if (inode->i_ino != ino) {
spin_unlock(&inode->i_lock);
@@ -1132,11 +1130,10 @@ EXPORT_SYMBOL(iget_locked);
static int test_inode_iunique(struct super_block *sb, unsigned long ino)
{
struct hlist_head *b = inode_hashtable + hash(sb, ino);
- struct hlist_node *node;
struct inode *inode;
spin_lock(&inode_hash_lock);
- hlist_for_each_entry(inode, node, b, i_hash) {
+ hlist_for_each_entry(inode, b, i_hash) {
if (inode->i_ino == ino && inode->i_sb == sb) {
spin_unlock(&inode_hash_lock);
return 0;
@@ -1291,10 +1288,9 @@ int insert_inode_locked(struct inode *inode)
struct hlist_head *head = inode_hashtable + hash(sb, ino);
while (1) {
- struct hlist_node *node;
struct inode *old = NULL;
spin_lock(&inode_hash_lock);
- hlist_for_each_entry(old, node, head, i_hash) {
+ hlist_for_each_entry(old, head, i_hash) {
if (old->i_ino != ino)
continue;
if (old->i_sb != sb)
@@ -1306,7 +1302,7 @@ int insert_inode_locked(struct inode *inode)
}
break;
}
- if (likely(!node)) {
+ if (likely(!old)) {
spin_lock(&inode->i_lock);
inode->i_state |= I_NEW;
hlist_add_head(&inode->i_hash, head);
@@ -1334,11 +1330,10 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
while (1) {
- struct hlist_node *node;
struct inode *old = NULL;
spin_lock(&inode_hash_lock);
- hlist_for_each_entry(old, node, head, i_hash) {
+ hlist_for_each_entry(old, head, i_hash) {
if (old->i_sb != sb)
continue;
if (!test(old, data))
@@ -1350,7 +1345,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
}
break;
}
- if (likely(!node)) {
+ if (likely(!old)) {
spin_lock(&inode->i_lock);
inode->i_state |= I_NEW;
hlist_add_head(&inode->i_hash, head);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index b7dc47ba675e..1781f06aa1c1 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -23,6 +23,7 @@
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/writeback.h>
+#include <linux/aio.h>
#include "jfs_incore.h"
#include "jfs_inode.h"
#include "jfs_filsys.h"
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 0e17090c310f..abdd75d44dd4 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -32,15 +32,15 @@
static struct hlist_head nlm_server_hosts[NLM_HOST_NRHASH];
static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH];
-#define for_each_host(host, pos, chain, table) \
+#define for_each_host(host, chain, table) \
for ((chain) = (table); \
(chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
- hlist_for_each_entry((host), (pos), (chain), h_hash)
+ hlist_for_each_entry((host), (chain), h_hash)
-#define for_each_host_safe(host, pos, next, chain, table) \
+#define for_each_host_safe(host, next, chain, table) \
for ((chain) = (table); \
(chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
- hlist_for_each_entry_safe((host), (pos), (next), \
+ hlist_for_each_entry_safe((host), (next), \
(chain), h_hash)
static unsigned long nrhosts;
@@ -225,7 +225,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
.net = net,
};
struct hlist_head *chain;
- struct hlist_node *pos;
struct nlm_host *host;
struct nsm_handle *nsm = NULL;
struct lockd_net *ln = net_generic(net, lockd_net_id);
@@ -237,7 +236,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
mutex_lock(&nlm_host_mutex);
chain = &nlm_client_hosts[nlm_hash_address(sap)];
- hlist_for_each_entry(host, pos, chain, h_hash) {
+ hlist_for_each_entry(host, chain, h_hash) {
if (host->net != net)
continue;
if (!rpc_cmp_addr(nlm_addr(host), sap))
@@ -322,7 +321,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
const size_t hostname_len)
{
struct hlist_head *chain;
- struct hlist_node *pos;
struct nlm_host *host = NULL;
struct nsm_handle *nsm = NULL;
struct sockaddr *src_sap = svc_daddr(rqstp);
@@ -350,7 +348,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
nlm_gc_hosts(net);
chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
- hlist_for_each_entry(host, pos, chain, h_hash) {
+ hlist_for_each_entry(host, chain, h_hash) {
if (host->net != net)
continue;
if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
@@ -515,10 +513,9 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
{
struct nlm_host *host;
struct hlist_head *chain;
- struct hlist_node *pos;
mutex_lock(&nlm_host_mutex);
- for_each_host(host, pos, chain, cache) {
+ for_each_host(host, chain, cache) {
if (host->h_nsmhandle == nsm
&& host->h_nsmstate != info->state) {
host->h_nsmstate = info->state;
@@ -570,7 +567,6 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
static void nlm_complain_hosts(struct net *net)
{
struct hlist_head *chain;
- struct hlist_node *pos;
struct nlm_host *host;
if (net) {
@@ -587,7 +583,7 @@ static void nlm_complain_hosts(struct net *net)
dprintk("lockd: %lu hosts left:\n", nrhosts);
}
- for_each_host(host, pos, chain, nlm_server_hosts) {
+ for_each_host(host, chain, nlm_server_hosts) {
if (net && host->net != net)
continue;
dprintk(" %s (cnt %d use %d exp %ld net %p)\n",
@@ -600,14 +596,13 @@ void
nlm_shutdown_hosts_net(struct net *net)
{
struct hlist_head *chain;
- struct hlist_node *pos;
struct nlm_host *host;
mutex_lock(&nlm_host_mutex);
/* First, make all hosts eligible for gc */
dprintk("lockd: nuking all hosts in net %p...\n", net);
- for_each_host(host, pos, chain, nlm_server_hosts) {
+ for_each_host(host, chain, nlm_server_hosts) {
if (net && host->net != net)
continue;
host->h_expires = jiffies - 1;
@@ -644,11 +639,11 @@ static void
nlm_gc_hosts(struct net *net)
{
struct hlist_head *chain;
- struct hlist_node *pos, *next;
+ struct hlist_node *next;
struct nlm_host *host;
dprintk("lockd: host garbage collection for net %p\n", net);
- for_each_host(host, pos, chain, nlm_server_hosts) {
+ for_each_host(host, chain, nlm_server_hosts) {
if (net && host->net != net)
continue;
host->h_inuse = 0;
@@ -657,7 +652,7 @@ nlm_gc_hosts(struct net *net)
/* Mark all hosts that hold locks, blocks or shares */
nlmsvc_mark_resources(net);
- for_each_host_safe(host, pos, next, chain, nlm_server_hosts) {
+ for_each_host_safe(host, next, chain, nlm_server_hosts) {
if (net && host->net != net)
continue;
if (atomic_read(&host->h_count) || host->h_inuse
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 0deb5f6c9dd4..747d40591675 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -84,7 +84,6 @@ __be32
nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
struct nfs_fh *f)
{
- struct hlist_node *pos;
struct nlm_file *file;
unsigned int hash;
__be32 nfserr;
@@ -96,7 +95,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
/* Lock file table */
mutex_lock(&nlm_file_mutex);
- hlist_for_each_entry(file, pos, &nlm_files[hash], f_list)
+ hlist_for_each_entry(file, &nlm_files[hash], f_list)
if (!nfs_compare_fh(&file->f_handle, f))
goto found;
@@ -248,13 +247,13 @@ static int
nlm_traverse_files(void *data, nlm_host_match_fn_t match,
int (*is_failover_file)(void *data, struct nlm_file *file))
{
- struct hlist_node *pos, *next;
+ struct hlist_node *next;
struct nlm_file *file;
int i, ret = 0;
mutex_lock(&nlm_file_mutex);
for (i = 0; i < FILE_NRHASH; i++) {
- hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) {
+ hlist_for_each_entry_safe(file, next, &nlm_files[i], f_list) {
if (is_failover_file && !is_failover_file(data, file))
continue;
file->f_count++;
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 6958adfaff08..24a05ba3de26 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -819,7 +819,7 @@ long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case NCP_IOC_CONN_LOGGED_IN:
case NCP_IOC_SETROOT:
if (!capable(CAP_SYS_ADMIN)) {
- ret = -EACCES;
+ ret = -EPERM;
goto out;
}
break;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9f3c66438d0e..84d8eae203a7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -197,7 +197,6 @@ error_0:
EXPORT_SYMBOL_GPL(nfs_alloc_client);
#if IS_ENABLED(CONFIG_NFS_V4)
-/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
void nfs_cleanup_cb_ident_idr(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 2e9779b58b7a..47d100872390 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -29,15 +29,14 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
if (clp->rpc_ops->version != 4 || minorversion != 0)
return ret;
-retry:
- if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL))
- return -ENOMEM;
+ idr_preload(GFP_KERNEL);
spin_lock(&nn->nfs_client_lock);
- ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident);
+ ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT);
+ if (ret >= 0)
+ clp->cl_cb_ident = ret;
spin_unlock(&nn->nfs_client_lock);
- if (ret == -EAGAIN)
- goto retry;
- return ret;
+ idr_preload_end();
+ return ret < 0 ? ret : 0;
}
#ifdef CONFIG_NFS_V4_1
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index d35b62e83ea6..6da209bd9408 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -77,9 +77,8 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
long hash)
{
struct nfs4_deviceid_node *d;
- struct hlist_node *n;
- hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+ hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
if (d->ld == ld && d->nfs_client == clp &&
!memcmp(&d->deviceid, id, sizeof(*id))) {
if (atomic_read(&d->ref))
@@ -248,12 +247,11 @@ static void
_deviceid_purge_client(const struct nfs_client *clp, long hash)
{
struct nfs4_deviceid_node *d;
- struct hlist_node *n;
HLIST_HEAD(tmp);
spin_lock(&nfs4_deviceid_lock);
rcu_read_lock();
- hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+ hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
if (d->nfs_client == clp && atomic_read(&d->ref)) {
hlist_del_init_rcu(&d->node);
hlist_add_head(&d->tmpnode, &tmp);
@@ -291,12 +289,11 @@ void
nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
{
struct nfs4_deviceid_node *d;
- struct hlist_node *n;
int i;
rcu_read_lock();
for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
- hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node)
+ hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[i], node)
if (d->nfs_client == clp)
set_bit(NFS_DEVICEID_INVALID, &d->flags);
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a6637de7ae13..b99ccde31d06 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -151,7 +151,7 @@ get_nfs4_file(struct nfs4_file *fi)
}
static int num_delegations;
-unsigned int max_delegations;
+unsigned long max_delegations;
/*
* Open owner state (share locks)
@@ -700,8 +700,8 @@ static int nfsd4_get_drc_mem(int slotsize, u32 num)
num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
spin_lock(&nfsd_drc_lock);
- avail = min_t(int, NFSD_MAX_MEM_PER_SESSION,
- nfsd_drc_max_mem - nfsd_drc_mem_used);
+ avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
+ nfsd_drc_max_mem - nfsd_drc_mem_used);
num = min_t(int, num, avail / slotsize);
nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock);
@@ -1060,7 +1060,6 @@ free_client(struct nfs4_client *clp)
}
free_svc_cred(&clp->cl_cred);
kfree(clp->cl_name.data);
- idr_remove_all(&clp->cl_stateids);
idr_destroy(&clp->cl_stateids);
kfree(clp);
}
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index d16a5d6435f2..35cdb934c663 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -246,7 +246,6 @@ static struct svc_cacherep *
nfsd_cache_search(struct svc_rqst *rqstp)
{
struct svc_cacherep *rp;
- struct hlist_node *hn;
struct hlist_head *rh;
__be32 xid = rqstp->rq_xid;
u32 proto = rqstp->rq_prot,
@@ -254,7 +253,7 @@ nfsd_cache_search(struct svc_rqst *rqstp)
proc = rqstp->rq_proc;
rh = &cache_hash[request_hash(xid)];
- hlist_for_each_entry(rp, hn, rh, c_hash) {
+ hlist_for_each_entry(rp, rh, c_hash) {
if (xid == rp->c_xid && proc == rp->c_proc &&
proto == rp->c_prot && vers == rp->c_vers &&
rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) &&
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index de23db255c69..07a473fd49bc 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -56,8 +56,8 @@ extern struct svc_version nfsd_version2, nfsd_version3,
extern u32 nfsd_supported_minorversion;
extern struct mutex nfsd_mutex;
extern spinlock_t nfsd_drc_lock;
-extern unsigned int nfsd_drc_max_mem;
-extern unsigned int nfsd_drc_mem_used;
+extern unsigned long nfsd_drc_max_mem;
+extern unsigned long nfsd_drc_mem_used;
extern const struct seq_operations nfs_exports_op;
@@ -106,7 +106,7 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
* NFSv4 State
*/
#ifdef CONFIG_NFSD_V4
-extern unsigned int max_delegations;
+extern unsigned long max_delegations;
void nfs4_state_init(void);
int nfsd4_init_slabs(void);
void nfsd4_free_slabs(void);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 40cb1cbaba4e..53e7f4585dc4 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -59,8 +59,8 @@ DEFINE_MUTEX(nfsd_mutex);
* nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
*/
spinlock_t nfsd_drc_lock;
-unsigned int nfsd_drc_max_mem;
-unsigned int nfsd_drc_mem_used;
+unsigned long nfsd_drc_max_mem;
+unsigned long nfsd_drc_mem_used;
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static struct svc_stat nfsd_acl_svcstats;
@@ -342,7 +342,7 @@ static void set_max_drc(void)
>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
nfsd_drc_mem_used = 0;
spin_lock_init(&nfsd_drc_lock);
- dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
+ dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
}
static int nfsd_get_default_max_blksize(void)
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 61946883025c..bec4af6eab13 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -126,7 +126,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
nilfs_transaction_commit(inode->i_sb);
mapped:
- wait_on_page_writeback(page);
+ wait_for_stable_page(page);
out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(ret);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 6b49f14eac8c..1e92930d59c3 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -25,7 +25,7 @@
#include <linux/gfp.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
-#include <linux/uio.h>
+#include <linux/aio.h>
#include "nilfs.h"
#include "btnode.h"
#include "segment.h"
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 6baadb5a8430..4bb21d67d9b1 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -52,7 +52,6 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
void __fsnotify_update_child_dentry_flags(struct inode *inode)
{
struct dentry *alias;
- struct hlist_node *p;
int watched;
if (!S_ISDIR(inode->i_mode))
@@ -64,7 +63,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
spin_lock(&inode->i_lock);
/* run all of the dentries associated with this inode. Since this is a
* directory, there damn well better only be one item on this list */
- hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct dentry *child;
/* run all of the children of the original inode and fix their
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index f31e90fc050d..74825be65b7b 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -36,12 +36,11 @@
static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
{
struct fsnotify_mark *mark;
- struct hlist_node *pos;
__u32 new_mask = 0;
assert_spin_locked(&inode->i_lock);
- hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list)
+ hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list)
new_mask |= mark->mask;
inode->i_fsnotify_mask = new_mask;
}
@@ -87,11 +86,11 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
void fsnotify_clear_marks_by_inode(struct inode *inode)
{
struct fsnotify_mark *mark, *lmark;
- struct hlist_node *pos, *n;
+ struct hlist_node *n;
LIST_HEAD(free_list);
spin_lock(&inode->i_lock);
- hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) {
+ hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, i.i_list) {
list_add(&mark->i.free_i_list, &free_list);
hlist_del_init_rcu(&mark->i.i_list);
fsnotify_get_mark(mark);
@@ -129,11 +128,10 @@ static struct fsnotify_mark *fsnotify_find_inode_mark_locked(
struct inode *inode)
{
struct fsnotify_mark *mark;
- struct hlist_node *pos;
assert_spin_locked(&inode->i_lock);
- hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) {
+ hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) {
if (mark->group == group) {
fsnotify_get_mark(mark);
return mark;
@@ -194,8 +192,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
struct fsnotify_group *group, struct inode *inode,
int allow_dups)
{
- struct fsnotify_mark *lmark;
- struct hlist_node *node, *last = NULL;
+ struct fsnotify_mark *lmark, *last = NULL;
int ret = 0;
mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
@@ -214,8 +211,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
}
/* should mark be in the middle of the current list? */
- hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) {
- last = node;
+ hlist_for_each_entry(lmark, &inode->i_fsnotify_marks, i.i_list) {
+ last = lmark;
if ((lmark->group == group) && !allow_dups) {
ret = -EEXIST;
@@ -235,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
- hlist_add_after_rcu(last, &mark->i.i_list);
+ hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
out:
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 871569c7d609..4216308b81b4 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -197,7 +197,6 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
{
/* ideally the idr is empty and we won't hit the BUG in the callback */
idr_for_each(&group->inotify_data.idr, idr_callback, group);
- idr_remove_all(&group->inotify_data.idr);
idr_destroy(&group->inotify_data.idr);
atomic_dec(&group->inotify_data.user->inotify_devs);
free_uid(group->inotify_data.user);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 228a2c2ad8d7..e0f7c1241a6a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -364,22 +364,20 @@ static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
{
int ret;
- do {
- if (unlikely(!idr_pre_get(idr, GFP_KERNEL)))
- return -ENOMEM;
+ idr_preload(GFP_KERNEL);
+ spin_lock(idr_lock);
- spin_lock(idr_lock);
- ret = idr_get_new_above(idr, i_mark, *last_wd + 1,
- &i_mark->wd);
+ ret = idr_alloc(idr, i_mark, *last_wd + 1, 0, GFP_NOWAIT);
+ if (ret >= 0) {
/* we added the mark to the idr, take a reference */
- if (!ret) {
- *last_wd = i_mark->wd;
- fsnotify_get_mark(&i_mark->fsn_mark);
- }
- spin_unlock(idr_lock);
- } while (ret == -EAGAIN);
+ i_mark->wd = ret;
+ *last_wd = i_mark->wd;
+ fsnotify_get_mark(&i_mark->fsn_mark);
+ }
- return ret;
+ spin_unlock(idr_lock);
+ idr_preload_end();
+ return ret < 0 ? ret : 0;
}
static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group,
@@ -576,8 +574,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
/* don't allow invalid bits: we don't want flags set */
mask = inotify_arg_to_mask(arg);
- if (unlikely(!(mask & IN_ALL_EVENTS)))
- return -EINVAL;
fsn_mark = fsnotify_find_inode_mark(group, inode);
if (!fsn_mark)
@@ -629,8 +625,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
/* don't allow invalid bits: we don't want flags set */
mask = inotify_arg_to_mask(arg);
- if (unlikely(!(mask & IN_ALL_EVENTS)))
- return -EINVAL;
tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
if (unlikely(!tmp_i_mark))
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 4df58b8ea64a..68ca5a8704b5 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -33,12 +33,12 @@
void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
{
struct fsnotify_mark *mark, *lmark;
- struct hlist_node *pos, *n;
+ struct hlist_node *n;
struct mount *m = real_mount(mnt);
LIST_HEAD(free_list);
spin_lock(&mnt->mnt_root->d_lock);
- hlist_for_each_entry_safe(mark, pos, n, &m->mnt_fsnotify_marks, m.m_list) {
+ hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, m.m_list) {
list_add(&mark->m.free_m_list, &free_list);
hlist_del_init_rcu(&mark->m.m_list);
fsnotify_get_mark(mark);
@@ -71,12 +71,11 @@ static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
{
struct mount *m = real_mount(mnt);
struct fsnotify_mark *mark;
- struct hlist_node *pos;
__u32 new_mask = 0;
assert_spin_locked(&mnt->mnt_root->d_lock);
- hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list)
+ hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list)
new_mask |= mark->mask;
m->mnt_fsnotify_mask = new_mask;
}
@@ -114,11 +113,10 @@ static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_
{
struct mount *m = real_mount(mnt);
struct fsnotify_mark *mark;
- struct hlist_node *pos;
assert_spin_locked(&mnt->mnt_root->d_lock);
- hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) {
+ hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) {
if (mark->group == group) {
fsnotify_get_mark(mark);
return mark;
@@ -153,8 +151,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
int allow_dups)
{
struct mount *m = real_mount(mnt);
- struct fsnotify_mark *lmark;
- struct hlist_node *node, *last = NULL;
+ struct fsnotify_mark *lmark, *last = NULL;
int ret = 0;
mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
@@ -173,8 +170,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
}
/* should mark be in the middle of the current list? */
- hlist_for_each_entry(lmark, node, &m->mnt_fsnotify_marks, m.m_list) {
- last = node;
+ hlist_for_each_entry(lmark, &m->mnt_fsnotify_marks, m.m_list) {
+ last = lmark;
if ((lmark->group == group) && !allow_dups) {
ret = -EEXIST;
@@ -194,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
- hlist_add_after_rcu(last, &mark->m.m_list);
+ hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
out:
fsnotify_recalc_vfsmount_mask_locked(mnt);
spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5b2d4f0853ac..600af8fbae56 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -27,6 +27,7 @@
#include <linux/swap.h>
#include <linux/uio.h>
#include <linux/writeback.h>
+#include <linux/aio.h>
#include <asm/page.h>
#include <asm/uaccess.h>
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index d3e118cc6ffa..2778b0255dc6 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -28,6 +28,7 @@
#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/log2.h>
+#include <linux/aio.h>
#include "aops.h"
#include "attrib.h"
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 657743254eb9..f6fff671bb34 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -593,9 +593,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
level = ocfs2_iocb_rw_locked_level(iocb);
ocfs2_rw_unlock(inode, level);
+ inode_dio_done(inode);
if (is_async)
aio_complete(iocb, ret, 0);
- inode_dio_done(inode);
}
/*
@@ -1194,6 +1194,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
goto out;
}
}
+ wait_for_stable_page(wc->w_pages[i]);
if (index == target_index)
wc->w_target_page = wc->w_pages[i];
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index ffb2da370a99..f671e49beb34 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,6 +22,8 @@
#ifndef OCFS2_AOPS_H
#define OCFS2_AOPS_H
+#include <linux/aio.h>
+
handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
struct page *page,
unsigned from,
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index a1d83c58b296..33b892464d14 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -304,28 +304,22 @@ static u8 o2net_num_from_nn(struct o2net_node *nn)
static int o2net_prep_nsw(struct o2net_node *nn, struct o2net_status_wait *nsw)
{
- int ret = 0;
-
- do {
- if (!idr_pre_get(&nn->nn_status_idr, GFP_ATOMIC)) {
- ret = -EAGAIN;
- break;
- }
- spin_lock(&nn->nn_lock);
- ret = idr_get_new(&nn->nn_status_idr, nsw, &nsw->ns_id);
- if (ret == 0)
- list_add_tail(&nsw->ns_node_item,
- &nn->nn_status_list);
- spin_unlock(&nn->nn_lock);
- } while (ret == -EAGAIN);
+ int ret;
- if (ret == 0) {
- init_waitqueue_head(&nsw->ns_wq);
- nsw->ns_sys_status = O2NET_ERR_NONE;
- nsw->ns_status = 0;
+ spin_lock(&nn->nn_lock);
+ ret = idr_alloc(&nn->nn_status_idr, nsw, 0, 0, GFP_ATOMIC);
+ if (ret >= 0) {
+ nsw->ns_id = ret;
+ list_add_tail(&nsw->ns_node_item, &nn->nn_status_list);
}
+ spin_unlock(&nn->nn_lock);
+ if (ret < 0)
+ return ret;
- return ret;
+ init_waitqueue_head(&nsw->ns_wq);
+ nsw->ns_sys_status = O2NET_ERR_NONE;
+ nsw->ns_status = 0;
+ return 0;
}
static void o2net_complete_nsw_locked(struct o2net_node *nn,
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 8db4b58b2e4b..ef999729e274 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -169,11 +169,10 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
u64 parent_blkno,
int skip_unhashed)
{
- struct hlist_node *p;
struct dentry *dentry;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
spin_lock(&dentry->d_lock);
if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
trace_ocfs2_find_local_alias(dentry->d_name.len,
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 15d81adb8d77..ba1e68923b21 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2090,7 +2090,6 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
u8 dead_node, u8 new_master)
{
int i;
- struct hlist_node *hash_iter;
struct hlist_head *bucket;
struct dlm_lock_resource *res, *next;
@@ -2121,7 +2120,7 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
* if necessary */
for (i = 0; i < DLM_HASH_BUCKETS; i++) {
bucket = dlm_lockres_hash(dlm, i);
- hlist_for_each_entry(res, hash_iter, bucket, hash_node) {
+ hlist_for_each_entry(res, bucket, hash_node) {
if (!(res->state & DLM_LOCK_RES_RECOVERING))
continue;
@@ -2280,7 +2279,6 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
{
- struct hlist_node *iter;
struct dlm_lock_resource *res;
int i;
struct hlist_head *bucket;
@@ -2306,7 +2304,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
*/
for (i = 0; i < DLM_HASH_BUCKETS; i++) {
bucket = dlm_lockres_hash(dlm, i);
- hlist_for_each_entry(res, iter, bucket, hash_node) {
+ hlist_for_each_entry(res, bucket, hash_node) {
/* always prune any $RECOVERY entries for dead nodes,
* otherwise hangs can occur during later recovery */
if (dlm_is_recovery_lock(res->lockname.name,
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 4f7795fb5fc0..bad2583881c8 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2322,7 +2322,7 @@ int ocfs2_inode_lock_full_nested(struct inode *inode,
status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
arg_flags, subclass, _RET_IP_);
if (status < 0) {
- if (status != -EAGAIN && status != -EIOCBRETRY)
+ if (status != -EAGAIN)
mlog_errno(status);
goto bail;
}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 88924a3133fa..c765bdf6d60e 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -28,6 +28,8 @@
#include "extent_map.h"
+struct iocb;
+
/* OCFS2 Inode Private Data */
struct ocfs2_inode_info
{
diff --git a/fs/pipe.c b/fs/pipe.c
index bd3479db4b62..dc86533457b2 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,6 +21,7 @@
#include <linux/audit.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
+#include <linux/aio.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9b43ff77a51e..c6528c663df4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -73,6 +73,7 @@
#include <linux/security.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
+#include <linux/printk.h>
#include <linux/cgroup.h>
#include <linux/cpuset.h>
#include <linux/audit.h>
@@ -952,7 +953,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
* /proc/pid/oom_adj is provided for legacy purposes, ask users to use
* /proc/pid/oom_score_adj instead.
*/
- printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
+ pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
current->comm, task_pid_nr(current), task_pid_nr(task),
task_pid_nr(task));
@@ -1711,7 +1712,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
return -ECHILD;
if (!capable(CAP_SYS_ADMIN)) {
- status = -EACCES;
+ status = -EPERM;
goto out_notask;
}
@@ -1844,7 +1845,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
struct dentry *result;
struct mm_struct *mm;
- result = ERR_PTR(-EACCES);
+ result = ERR_PTR(-EPERM);
if (!capable(CAP_SYS_ADMIN))
goto out;
@@ -1900,7 +1901,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
ino_t ino;
int ret;
- ret = -EACCES;
+ ret = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto out;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 76ddae83daa5..d00072351cd5 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/printk.h>
#include <linux/mount.h>
#include <linux/init.h>
#include <linux/idr.h>
@@ -132,11 +133,8 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes,
}
if (start == NULL) {
- if (n > PAGE_SIZE) {
- printk(KERN_ERR
- "proc_file_read: Apparent buffer overflow!\n");
+ if (n > PAGE_SIZE) /* Apparent buffer overflow */
n = PAGE_SIZE;
- }
n -= *ppos;
if (n <= 0)
break;
@@ -144,26 +142,19 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes,
n = count;
start = page + *ppos;
} else if (start < page) {
- if (n > PAGE_SIZE) {
- printk(KERN_ERR
- "proc_file_read: Apparent buffer overflow!\n");
+ if (n > PAGE_SIZE) /* Apparent buffer overflow */
n = PAGE_SIZE;
- }
if (n > count) {
/*
* Don't reduce n because doing so might
* cut off part of a data block.
*/
- printk(KERN_WARNING
- "proc_file_read: Read count exceeded\n");
+ pr_warn("proc_file_read: count exceeded\n");
}
} else /* start >= page */ {
unsigned long startoff = (unsigned long)(start - page);
- if (n > (PAGE_SIZE - startoff)) {
- printk(KERN_ERR
- "proc_file_read: Apparent buffer overflow!\n");
+ if (n > (PAGE_SIZE - startoff)) /* buffer overflow? */
n = PAGE_SIZE - startoff;
- }
if (n > count)
n = count;
}
@@ -576,7 +567,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
for (tmp = dir->subdir; tmp; tmp = tmp->next)
if (strcmp(tmp->name, dp->name) == 0) {
- WARN(1, KERN_WARNING "proc_dir_entry '%s/%s' already registered\n",
+ WARN(1, "proc_dir_entry '%s/%s' already registered\n",
dir->name, dp->name);
break;
}
@@ -837,7 +828,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
if (S_ISDIR(de->mode))
parent->nlink--;
de->nlink = 0;
- WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
+ WARN(de->subdir, "%s: removing non-empty directory "
"'%s/%s', leaking at least '%s'\n", __func__,
de->parent->name, de->name, de->subdir->name);
pde_put(de);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 439ae6886507..7a59f50f64d6 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -13,6 +13,7 @@
#include <linux/stat.h>
#include <linux/completion.h>
#include <linux/poll.h>
+#include <linux/printk.h>
#include <linux/file.h>
#include <linux/limits.h>
#include <linux/init.h>
@@ -486,6 +487,8 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
int proc_fill_super(struct super_block *s)
{
+ struct inode *root_inode;
+
s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
@@ -494,11 +497,18 @@ int proc_fill_super(struct super_block *s)
s->s_time_gran = 1;
pde_get(&proc_root);
- s->s_root = d_make_root(proc_get_inode(s, &proc_root));
- if (s->s_root)
- return 0;
+ root_inode = proc_get_inode(s, &proc_root);
+ if (!root_inode) {
+ pr_warn("proc_fill_super: get root inode failed\n");
+ pde_put(&proc_root);
+ return -ENOMEM;
+ }
- printk("proc_read_super: get root inode failed\n");
- pde_put(&proc_root);
- return -ENOMEM;
+ s->s_root = d_make_root(root_inode);
+ if (!s->s_root) {
+ pr_err("proc_fill_super: allocate dentry failed\n");
+ return -ENOMEM;
+ }
+
+ return 0;
}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 252544c05207..85ff3a4598b3 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/proc_fs.h>
+#include <linux/binfmts.h>
struct ctl_table_header;
struct mempolicy;
@@ -108,7 +109,7 @@ static inline int task_dumpable(struct task_struct *task)
if (mm)
dumpable = get_dumpable(mm);
task_unlock(task);
- if (dumpable == SUID_DUMPABLE_ENABLED)
+ if (dumpable == SUID_DUMP_USER)
return 1;
return 0;
}
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e96d4f18ca3a..eda6f017f272 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -17,6 +17,7 @@
#include <linux/elfcore.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
+#include <linux/printk.h>
#include <linux/bootmem.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -619,7 +620,7 @@ static int __init proc_kcore_init(void)
proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
&proc_kcore_operations);
if (!proc_root_kcore) {
- printk(KERN_ERR "couldn't create /proc/kcore\n");
+ pr_err("couldn't create /proc/kcore\n");
return 0; /* Always returns 0. */
}
/* Store text area if it's special */
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 80e4645f7990..1efaaa19c4f3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
* sysctl_overcommit_ratio / 100) + total_swap_pages;
cached = global_page_state(NR_FILE_PAGES) -
- total_swapcache_pages - i.bufferram;
+ total_swapcache_pages() - i.bufferram;
if (cached < 0)
cached = 0;
@@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
K(i.freeram),
K(i.bufferram),
K(cached),
- K(total_swapcache_pages),
+ K(total_swapcache_pages()),
K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
K(pages[LRU_ACTIVE_ANON]),
@@ -158,7 +158,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
vmi.used >> 10,
vmi.largest_chunk >> 10
#ifdef CONFIG_MEMORY_FAILURE
- ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
+ ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index de20ec480fa0..30b590f5bd35 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -8,6 +8,7 @@
#include <linux/time.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/printk.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/of.h>
@@ -110,8 +111,8 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde,
if (ent->data == oldprop)
break;
if (ent == NULL) {
- printk(KERN_WARNING "device-tree: property \"%s\" "
- " does not exist\n", oldprop->name);
+ pr_warn("device-tree: property \"%s\" does not exist\n",
+ oldprop->name);
} else {
ent->data = newprop;
ent->size = newprop->length;
@@ -153,8 +154,8 @@ static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de,
realloc:
fixed_name = kmalloc(fixup_len, GFP_KERNEL);
if (fixed_name == NULL) {
- printk(KERN_ERR "device-tree: Out of memory trying to fixup "
- "name \"%s\"\n", name);
+ pr_err("device-tree: Out of memory trying to fixup "
+ "name \"%s\"\n", name);
return name;
}
@@ -175,8 +176,8 @@ retry:
goto retry;
}
- printk(KERN_WARNING "device-tree: Duplicate name in %s, "
- "renamed to \"%s\"\n", np->full_name, fixed_name);
+ pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n",
+ np->full_name, fixed_name);
return fixed_name;
}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1827d88ad58b..6384b04175af 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -5,6 +5,7 @@
#include <linux/sysctl.h>
#include <linux/poll.h>
#include <linux/proc_fs.h>
+#include <linux/printk.h>
#include <linux/security.h>
#include <linux/sched.h>
#include <linux/namei.h>
@@ -57,7 +58,7 @@ static void sysctl_print_dir(struct ctl_dir *dir)
{
if (dir->header.parent)
sysctl_print_dir(dir->header.parent);
- printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname);
+ pr_cont("%s/", dir->header.ctl_table[0].procname);
}
static int namecmp(const char *name1, int len1, const char *name2, int len2)
@@ -134,9 +135,9 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
else if (cmp > 0)
p = &(*p)->rb_right;
else {
- printk(KERN_ERR "sysctl duplicate entry: ");
+ pr_err("sysctl duplicate entry: ");
sysctl_print_dir(head->parent);
- printk(KERN_CONT "/%s\n", entry->procname);
+ pr_cont("/%s\n", entry->procname);
return -EEXIST;
}
}
@@ -927,9 +928,9 @@ found:
subdir->header.nreg++;
failed:
if (unlikely(IS_ERR(subdir))) {
- printk(KERN_ERR "sysctl could not get directory: ");
+ pr_err("sysctl could not get directory: ");
sysctl_print_dir(dir);
- printk(KERN_CONT "/%*.*s %ld\n",
+ pr_cont("/%*.*s %ld\n",
namelen, namelen, name, PTR_ERR(subdir));
}
drop_sysctl_table(&dir->header);
@@ -995,8 +996,8 @@ static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
- printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
- path, table->procname, &vaf);
+ pr_err("sysctl table check failed: %s/%s %pV\n",
+ path, table->procname, &vaf);
va_end(args);
return -EINVAL;
@@ -1510,9 +1511,9 @@ static void put_links(struct ctl_table_header *header)
drop_sysctl_table(link_head);
}
else {
- printk(KERN_ERR "sysctl link missing during unregister: ");
+ pr_err("sysctl link missing during unregister: ");
sysctl_print_dir(parent);
- printk(KERN_CONT "/%s\n", name);
+ pr_cont("/%s\n", name);
}
}
}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 0d5071d29985..41dd018f72dd 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -15,6 +15,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/highmem.h>
+#include <linux/printk.h>
#include <linux/bootmem.h>
#include <linux/init.h>
#include <linux/crash_dump.h>
@@ -553,8 +554,7 @@ static int __init parse_crash_elf64_headers(void)
ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
ehdr.e_phnum == 0) {
- printk(KERN_WARNING "Warning: Core image elf header is not"
- "sane\n");
+ pr_warn("Warning: Core image elf header is not sane\n");
return -EINVAL;
}
@@ -609,8 +609,7 @@ static int __init parse_crash_elf32_headers(void)
ehdr.e_ehsize != sizeof(Elf32_Ehdr) ||
ehdr.e_phentsize != sizeof(Elf32_Phdr) ||
ehdr.e_phnum == 0) {
- printk(KERN_WARNING "Warning: Core image elf header is not"
- "sane\n");
+ pr_warn("Warning: Core image elf header is not sane\n");
return -EINVAL;
}
@@ -653,8 +652,7 @@ static int __init parse_crash_elf_headers(void)
if (rc < 0)
return rc;
if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
- printk(KERN_WARNING "Warning: Core image elf header"
- " not found\n");
+ pr_warn("Warning: Core image elf header not found\n");
return -EINVAL;
}
@@ -673,8 +671,7 @@ static int __init parse_crash_elf_headers(void)
/* Determine vmcore size. */
vmcore_size = get_vmcore_size_elf32(elfcorebuf);
} else {
- printk(KERN_WARNING "Warning: Core image elf header is not"
- " sane\n");
+ pr_warn("Warning: Core image elf header is not sane\n");
return -EINVAL;
}
return 0;
@@ -690,7 +687,7 @@ static int __init vmcore_init(void)
return rc;
rc = parse_crash_elf_headers();
if (rc) {
- printk(KERN_WARNING "Kdump: vmcore not initialized\n");
+ pr_warn("Kdump: vmcore not initialized\n");
return rc;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index bb34af315280..616bc3962bb0 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/splice.h>
+#include <linux/aio.h>
#include "read_write.h"
#include <asm/uaccess.h>
@@ -318,16 +319,6 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
}
-static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
-{
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!kiocbIsKicked(iocb))
- schedule();
- else
- kiocbClearKicked(iocb);
- __set_current_state(TASK_RUNNING);
-}
-
ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
struct iovec iov = { .iov_base = buf, .iov_len = len };
@@ -339,13 +330,7 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
kiocb.ki_left = len;
kiocb.ki_nbytes = len;
- for (;;) {
- ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
- if (ret != -EIOCBRETRY)
- break;
- wait_on_retry_sync_kiocb(&kiocb);
- }
-
+ ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
@@ -395,13 +380,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
kiocb.ki_left = len;
kiocb.ki_nbytes = len;
- for (;;) {
- ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
- if (ret != -EIOCBRETRY)
- break;
- wait_on_retry_sync_kiocb(&kiocb);
- }
-
+ ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
@@ -568,13 +547,7 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
kiocb.ki_left = len;
kiocb.ki_nbytes = len;
- for (;;) {
- ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
- if (ret != -EIOCBRETRY)
- break;
- wait_on_retry_sync_kiocb(&kiocb);
- }
-
+ ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
if (ret == -EIOCBQUEUED)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 95d7680ead47..30195bcbbfc3 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -18,6 +18,7 @@
#include <linux/writeback.h>
#include <linux/quotaops.h>
#include <linux/swap.h>
+#include <linux/aio.h>
int reiserfs_commit_write(struct file *f, struct page *page,
unsigned from, unsigned to);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index f2bc3dfd0b88..15c6304bab71 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -308,27 +308,27 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence)
mutex_lock(&m->lock);
m->version = file->f_version;
switch (whence) {
- case 1:
- offset += file->f_pos;
- case 0:
- if (offset < 0)
- break;
- retval = offset;
- if (offset != m->read_pos) {
- while ((retval=traverse(m, offset)) == -EAGAIN)
- ;
- if (retval) {
- /* with extreme prejudice... */
- file->f_pos = 0;
- m->read_pos = 0;
- m->version = 0;
- m->index = 0;
- m->count = 0;
- } else {
- m->read_pos = offset;
- retval = file->f_pos = offset;
- }
+ case SEEK_CUR:
+ offset += file->f_pos;
+ case SEEK_SET:
+ if (offset < 0)
+ break;
+ retval = offset;
+ if (offset != m->read_pos) {
+ while ((retval = traverse(m, offset)) == -EAGAIN)
+ ;
+ if (retval) {
+ /* with extreme prejudice... */
+ file->f_pos = 0;
+ m->read_pos = 0;
+ m->version = 0;
+ m->index = 0;
+ m->count = 0;
+ } else {
+ m->read_pos = offset;
+ retval = file->f_pos = offset;
}
+ }
}
file->f_version = m->version;
mutex_unlock(&m->lock);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index b53486961735..c723b5d56c8f 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -30,6 +30,7 @@
#include <linux/signalfd.h>
#include <linux/syscalls.h>
#include <linux/proc_fs.h>
+#include <linux/compat.h>
void signalfd_cleanup(struct sighand_struct *sighand)
{
@@ -74,6 +75,39 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait)
}
/*
+ * Copy a whole siginfo into userspace.
+ * The main idea of this format is that it should be enough
+ * for restoring siginfo back into the kernel.
+ */
+static int signalfd_copy_raw_info(struct signalfd_siginfo __user *siginfo,
+ siginfo_t *kinfo)
+{
+ siginfo_t __user *uinfo = (siginfo_t __user *)siginfo;
+ int err;
+
+ BUILD_BUG_ON(sizeof(siginfo_t) != sizeof(struct signalfd_siginfo));
+
+ err = __clear_user(uinfo, sizeof(*uinfo));
+
+#ifdef CONFIG_COMPAT
+ if (unlikely(is_compat_task())) {
+ compat_siginfo_t __user *compat_uinfo;
+
+ compat_uinfo = (compat_siginfo_t __user *)siginfo;
+ err |= copy_siginfo_to_user32(compat_uinfo, kinfo);
+ err |= put_user(kinfo->si_code, &compat_uinfo->si_code);
+
+ return err ? -EFAULT : sizeof(*compat_uinfo);
+ }
+#endif
+
+ err |= copy_siginfo_to_user(uinfo, kinfo);
+ err |= put_user(kinfo->si_code, &uinfo->si_code);
+
+ return err ? -EFAULT : sizeof(*uinfo);
+}
+
+/*
* Copied from copy_siginfo_to_user() in kernel/signal.c
*/
static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
@@ -205,6 +239,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
struct signalfd_ctx *ctx = file->private_data;
struct signalfd_siginfo __user *siginfo;
int nonblock = file->f_flags & O_NONBLOCK;
+ bool raw = file->f_flags & SFD_RAW;
ssize_t ret, total = 0;
siginfo_t info;
@@ -217,7 +252,12 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
ret = signalfd_dequeue(ctx, &info, nonblock);
if (unlikely(ret <= 0))
break;
- ret = signalfd_copyinfo(siginfo, &info);
+
+ if (raw)
+ ret = signalfd_copy_raw_info(siginfo, &info);
+ else
+ ret = signalfd_copyinfo(siginfo, &info);
+
if (ret < 0)
break;
siginfo++;
@@ -262,7 +302,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
- if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
+ if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK | SFD_RAW))
return -EINVAL;
if (sizemask != sizeof(sigset_t) ||
@@ -272,20 +312,35 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
signotset(&sigmask);
if (ufd == -1) {
+ struct file *file;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->sigmask = sigmask;
+ ufd = get_unused_fd_flags(flags);
+ if (ufd < 0) {
+ kfree(ctx);
+ goto out;
+ }
+
/*
* When we call this, the initialization must be complete, since
* anon_inode_getfd() will install the fd.
*/
- ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
+ file = anon_inode_getfile("[signalfd]", &signalfd_fops, ctx,
O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK)));
- if (ufd < 0)
+ if (IS_ERR(file)) {
+ put_unused_fd(ufd);
+ ufd = PTR_ERR(file);
kfree(ctx);
+ goto out;
+ }
+
+ file->f_flags |= flags & SFD_RAW;
+
+ fd_install(ufd, file);
} else {
struct fd f = fdget(ufd);
if (!f.file)
@@ -302,7 +357,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
wake_up(&current->sighand->signalfd_wqh);
fdput(f);
}
-
+out:
return ufd;
}
diff --git a/fs/super.c b/fs/super.c
index 12f123712161..c290cca1069a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -447,14 +447,13 @@ struct super_block *sget(struct file_system_type *type,
void *data)
{
struct super_block *s = NULL;
- struct hlist_node *node;
struct super_block *old;
int err;
retry:
spin_lock(&sb_lock);
if (test) {
- hlist_for_each_entry(old, node, &type->fs_supers, s_instances) {
+ hlist_for_each_entry(old, &type->fs_supers, s_instances) {
if (!test(old, data))
continue;
if (!grab_super(old))
@@ -554,10 +553,9 @@ void iterate_supers_type(struct file_system_type *type,
void (*f)(struct super_block *, void *), void *arg)
{
struct super_block *sb, *p = NULL;
- struct hlist_node *node;
spin_lock(&sb_lock);
- hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) {
+ hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
sb->s_count++;
spin_unlock(&sb_lock);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 614b2b544880..f186e277a127 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -461,14 +461,13 @@ const struct file_operations bin_fops = {
void unmap_bin_file(struct sysfs_dirent *attr_sd)
{
struct bin_buffer *bb;
- struct hlist_node *tmp;
if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
return;
mutex_lock(&sysfs_bin_lock);
- hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
+ hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) {
struct inode *inode = bb->file->f_path.dentry->d_inode;
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5bc77817f382..deaf954368ad 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -50,6 +50,7 @@
*/
#include "ubifs.h"
+#include <linux/aio.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/slab.h>
@@ -1522,6 +1523,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
ubifs_release_dirty_inode_budget(c, ui);
}
+ wait_for_stable_page(page);
unlock_page(page);
return 0;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 77b5953eaac8..63e25fc83328 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -204,7 +204,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto out;
case UDF_RELOCATE_BLOCKS:
if (!capable(CAP_SYS_ADMIN)) {
- result = -EACCES;
+ result = -EPERM;
goto out;
}
if (get_user(old_block, (long __user *)arg)) {
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7a12e48ad819..b6d15d349810 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -38,6 +38,7 @@
#include <linux/slab.h>
#include <linux/crc-itu-t.h>
#include <linux/mpage.h>
+#include <linux/aio.h>
#include "udf_i.h"
#include "udf_sb.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5f707e537171..c24ce0e9c67c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -31,6 +31,7 @@
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
+#include <linux/aio.h>
#include <linux/gfp.h>
#include <linux/mpage.h>
#include <linux/pagevec.h>
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 67284edb84d7..a4d7ef78fd56 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -36,6 +36,7 @@
#include "xfs_ioctl.h"
#include "xfs_trace.h"
+#include <linux/aio.h>
#include <linux/dcache.h>
#include <linux/falloc.h>
#include <linux/pagevec.h>
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 96fcbb85ff83..d1dba7ce75ae 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1442,9 +1442,8 @@ xlog_recover_find_tid(
xlog_tid_t tid)
{
xlog_recover_t *trans;
- struct hlist_node *n;
- hlist_for_each_entry(trans, n, head, r_list) {
+ hlist_for_each_entry(trans, head, r_list) {
if (trans->r_log_tid == tid)
return trans;
}