From a0eb3a05a8cbe9cd1a41dde3d1b2e5bcc10634f2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Feb 2012 16:19:25 -0800 Subject: userns: Convert hugetlbfs to use kuid/kgid where appropriate Note sysctl_hugetlb_shm_group can only be written in the root user in the initial user namespace, so we can assume sysctl_hugetlb_shm_group is in the initial user namespace. Cc: William Irwin Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/hugetlbfs/inode.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'fs/hugetlbfs/inode.c') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8349a899912e..6e572c4fbf68 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -42,8 +42,8 @@ static const struct inode_operations hugetlbfs_dir_inode_operations; static const struct inode_operations hugetlbfs_inode_operations; struct hugetlbfs_config { - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; umode_t mode; long nr_blocks; long nr_inodes; @@ -785,13 +785,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) case Opt_uid: if (match_int(&args[0], &option)) goto bad_val; - pconfig->uid = option; + pconfig->uid = make_kuid(current_user_ns(), option); + if (!uid_valid(pconfig->uid)) + goto bad_val; break; case Opt_gid: if (match_int(&args[0], &option)) goto bad_val; - pconfig->gid = option; + pconfig->gid = make_kgid(current_user_ns(), option); + if (!gid_valid(pconfig->gid)) + goto bad_val; break; case Opt_mode: @@ -924,7 +928,9 @@ static struct vfsmount *hugetlbfs_vfsmount; static int can_do_hugetlb_shm(void) { - return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); + kgid_t shm_group; + shm_group = make_kgid(&init_user_ns, sysctl_hugetlb_shm_group); + return capable(CAP_IPC_LOCK) || in_group_p(shm_group); } struct file *hugetlb_file_setup(const char *name, unsigned long addr, -- cgit v1.2.3 From 8c0a85377048b64c880e76ec7368904fe46d0b94 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 26 Sep 2012 11:33:07 +1000 Subject: fs: push rcu_barrier() from deactivate_locked_super() to filesystems There's no reason to call rcu_barrier() on every deactivate_locked_super(). We only need to make sure that all delayed rcu free inodes are flushed before we destroy related cache. Removing rcu_barrier() from deactivate_locked_super() affects some fast paths. E.g. on my machine exit_group() of a last process in IPC namespace takes 0.07538s. rcu_barrier() takes 0.05188s of that time. Signed-off-by: Kirill A. Shutemov Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- fs/9p/v9fs.c | 5 +++++ fs/adfs/super.c | 5 +++++ fs/affs/super.c | 5 +++++ fs/afs/super.c | 5 +++++ fs/befs/linuxvfs.c | 5 +++++ fs/bfs/inode.c | 5 +++++ fs/btrfs/extent_io.c | 6 ++++++ fs/btrfs/inode.c | 5 +++++ fs/ceph/super.c | 5 +++++ fs/cifs/cifsfs.c | 5 +++++ fs/coda/inode.c | 5 +++++ fs/ecryptfs/main.c | 6 ++++++ fs/efs/super.c | 5 +++++ fs/exofs/super.c | 5 +++++ fs/ext2/super.c | 5 +++++ fs/ext3/super.c | 5 +++++ fs/ext4/super.c | 5 +++++ fs/fat/inode.c | 5 +++++ fs/freevxfs/vxfs_super.c | 5 +++++ fs/fuse/inode.c | 6 ++++++ fs/hfs/super.c | 6 ++++++ fs/hfsplus/super.c | 6 ++++++ fs/hpfs/super.c | 5 +++++ fs/hugetlbfs/inode.c | 5 +++++ fs/isofs/inode.c | 5 +++++ fs/jffs2/super.c | 6 ++++++ fs/jfs/super.c | 6 ++++++ fs/logfs/inode.c | 5 +++++ fs/minix/inode.c | 5 +++++ fs/ncpfs/inode.c | 5 +++++ fs/nfs/inode.c | 5 +++++ fs/nilfs2/super.c | 6 ++++++ fs/ntfs/super.c | 6 ++++++ fs/ocfs2/dlmfs/dlmfs.c | 5 +++++ fs/ocfs2/super.c | 5 +++++ fs/openpromfs/inode.c | 5 +++++ fs/qnx4/inode.c | 5 +++++ fs/qnx6/inode.c | 5 +++++ fs/reiserfs/super.c | 5 +++++ fs/romfs/super.c | 5 +++++ fs/squashfs/super.c | 5 +++++ fs/super.c | 6 ------ fs/sysv/inode.c | 5 +++++ fs/ubifs/super.c | 6 ++++++ fs/udf/super.c | 5 +++++ fs/ufs/super.c | 5 +++++ fs/xfs/xfs_super.c | 5 +++++ 47 files changed, 240 insertions(+), 6 deletions(-) (limited to 'fs/hugetlbfs/inode.c') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index b85efa773949..392c5dac1981 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -560,6 +560,11 @@ static int v9fs_init_inode_cache(void) */ static void v9fs_destroy_inode_cache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(v9fs_inode_cache); } diff --git a/fs/adfs/super.c b/fs/adfs/super.c index bdaec92353c2..c830c857c663 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -275,6 +275,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(adfs_inode_cachep); } diff --git a/fs/affs/super.c b/fs/affs/super.c index c70f1e5fc024..2f57053bf26c 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -147,6 +147,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(affs_inode_cachep); } diff --git a/fs/afs/super.c b/fs/afs/super.c index df8c6047c2a1..43165009428d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -123,6 +123,11 @@ void __exit afs_fs_exit(void) BUG(); } + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(afs_inode_cachep); _leave(""); } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index cf7f3c67c8b7..962b4f8f7994 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -454,6 +454,11 @@ befs_init_inodecache(void) static void befs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(befs_inode_cachep); } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 9870417c26e7..d5fc598d6e4a 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -280,6 +280,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(bfs_inode_cachep); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4c878476bb91..b08ea4717e9d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -107,6 +107,12 @@ void extent_io_exit(void) list_del(&eb->leak_list); kmem_cache_free(extent_buffer_cache, eb); } + + /* + * Make sure all delayed rcu free are flushed before we + * destroy caches. + */ + rcu_barrier(); if (extent_state_cache) kmem_cache_destroy(extent_state_cache); if (extent_buffer_cache) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ec154f954646..cf03a91d806f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7076,6 +7076,11 @@ static void init_once(void *foo) void btrfs_destroy_cachep(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); if (btrfs_inode_cachep) kmem_cache_destroy(btrfs_inode_cachep); if (btrfs_trans_handle_cachep) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index b982239f38f9..3a42d9326378 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -603,6 +603,11 @@ bad_cap: static void destroy_caches(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ceph_inode_cachep); kmem_cache_destroy(ceph_cap_cachep); kmem_cache_destroy(ceph_dentry_cachep); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index db8a404a51dd..d4ce77a02327 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -977,6 +977,11 @@ cifs_init_inodecache(void) static void cifs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(cifs_inode_cachep); } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index d315c6c5891a..be2aa4909487 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -85,6 +85,11 @@ int coda_init_inodecache(void) void coda_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(coda_inode_cachep); } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9b627c15010a..34fcde765d24 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -710,6 +710,12 @@ static void ecryptfs_free_kmem_caches(void) { int i; + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { struct ecryptfs_cache_info *info; diff --git a/fs/efs/super.c b/fs/efs/super.c index e755ec746c69..2002431ef9a0 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -96,6 +96,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(efs_inode_cachep); } diff --git a/fs/exofs/super.c b/fs/exofs/super.c index dde41a75c7c8..59e3bbfac0b1 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -206,6 +206,11 @@ static int init_inodecache(void) */ static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(exofs_inode_cachep); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index af74d9e27b71..6c205d0c565b 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -206,6 +206,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext2_inode_cachep); } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8c892e93d8e7..8d41c8889eee 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -532,6 +532,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext3_inode_cachep); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c6e0cb3d1f4a..455b7d8c6d62 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1019,6 +1019,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext4_inode_cachep); } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 05e897fe9866..fd8e47cd898b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -521,6 +521,11 @@ static int __init fat_init_inodecache(void) static void __exit fat_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(fat_inode_cachep); } diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index d4fabd26084e..fed2c8afb3a9 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -279,6 +279,11 @@ static void __exit vxfs_cleanup(void) { unregister_filesystem(&vxfs_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(vxfs_inode_cachep); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fca222dabe3c..f0eda124cffb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1197,6 +1197,12 @@ static void fuse_fs_cleanup(void) { unregister_filesystem(&fuse_fs_type); unregister_fuseblk(); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(fuse_inode_cachep); } diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4eb873e0c07b..941d7a8c2197 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -482,6 +482,12 @@ static int __init init_hfs_fs(void) static void __exit exit_hfs_fs(void) { unregister_filesystem(&hfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hfs_inode_cachep); } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index fdafb2d71654..811a84d2d964 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -635,6 +635,12 @@ static int __init init_hfsplus_fs(void) static void __exit exit_hfsplus_fs(void) { unregister_filesystem(&hfsplus_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hfsplus_inode_cachep); } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 706a12c083ea..3cb1da56eb73 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -210,6 +210,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hpfs_inode_cachep); } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8349a899912e..c4b85d064e6b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1042,6 +1042,11 @@ static int __init init_hugetlbfs_fs(void) static void __exit exit_hugetlbfs_fs(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hugetlbfs_inode_cachep); kern_unmount(hugetlbfs_vfsmount); unregister_filesystem(&hugetlbfs_fs_type); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 29037c365ba4..f94cde4527e8 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -114,6 +114,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(isofs_inode_cachep); } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 61ea41389f90..ff487954cd96 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -418,6 +418,12 @@ static void __exit exit_jffs2_fs(void) unregister_filesystem(&jffs2_fs_type); jffs2_destroy_slab_caches(); jffs2_compressors_exit(); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(jffs2_inode_cachep); } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index c55c7452d285..3735347fd5f6 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -903,6 +903,12 @@ static void __exit exit_jfs_fs(void) jfs_proc_clean(); #endif unregister_filesystem(&jfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(jfs_inode_cachep); } diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 6984562738d3..121bba2cf6f2 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -417,5 +417,10 @@ int logfs_init_inode_cache(void) void logfs_destroy_inode_cache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(logfs_inode_cache); } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 2a503ad020d5..dc8d3629c20a 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -100,6 +100,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(minix_inode_cachep); } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 333df07ae3bd..0c62c55b25d7 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -89,6 +89,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ncp_inode_cachep); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9b47610338f5..e4c716d374a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void) static void nfs_destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(nfs_inode_cachep); } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6a10812711c1..3c991dc84f2f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1382,6 +1382,12 @@ static void nilfs_segbuf_init_once(void *obj) static void nilfs_destroy_cachep(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + if (nilfs_inode_cachep) kmem_cache_destroy(nilfs_inode_cachep); if (nilfs_transaction_cachep) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 2bc149d6a784..fe08d4afa106 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3168,6 +3168,12 @@ static void __exit exit_ntfs_fs(void) ntfs_debug("Unregistering NTFS driver."); unregister_filesystem(&ntfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ntfs_big_inode_cache); kmem_cache_destroy(ntfs_inode_cache); kmem_cache_destroy(ntfs_name_cache); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 83b6f98e0665..16b712d260d4 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -691,6 +691,11 @@ static void __exit exit_dlmfs_fs(void) flush_workqueue(user_dlm_worker); destroy_workqueue(user_dlm_worker); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(dlmfs_inode_cache); bdi_destroy(&dlmfs_backing_dev_info); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 68f4541c2db9..0e91ec22a940 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1818,6 +1818,11 @@ static int ocfs2_initialize_mem_caches(void) static void ocfs2_free_mem_caches(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); if (ocfs2_inode_cachep) kmem_cache_destroy(ocfs2_inode_cachep); ocfs2_inode_cachep = NULL; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 4a3477949bca..2ad080faca34 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -463,6 +463,11 @@ static int __init init_openprom_fs(void) static void __exit exit_openprom_fs(void) { unregister_filesystem(&openprom_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(op_inode_cachep); } diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 552e994e3aa1..9534b4f76579 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -391,6 +391,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(qnx4_inode_cachep); } diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 2049c814bda4..1b37fff7b5ff 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -651,6 +651,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(qnx6_inode_cachep); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7a37dabf5a96..1078ae179993 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -608,6 +608,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(reiserfs_inode_cachep); } diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 77c5f2173983..fd7c5f60b46b 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -648,6 +648,11 @@ error_register: static void __exit exit_romfs_fs(void) { unregister_filesystem(&romfs_fs_type); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(romfs_inode_cachep); } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 29cd014ed3a1..260e3928d4f5 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -425,6 +425,11 @@ static int __init init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(squashfs_inode_cachep); } diff --git a/fs/super.c b/fs/super.c index 0902cfa6a12e..5fdf7ff32c4e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -307,12 +307,6 @@ void deactivate_locked_super(struct super_block *s) /* caches are now gone, we can safely kill the shrinker now */ unregister_shrinker(&s->s_shrink); - - /* - * We need to call rcu_barrier so all the delayed rcu free - * inodes are flushed before we release the fs module. - */ - rcu_barrier(); put_filesystem(fs); put_super(s); } else { diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 80e1e2b18df1..0d0c50bd3321 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -360,5 +360,10 @@ int __init sysv_init_icache(void) void sysv_destroy_icache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(sysv_inode_cachep); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 71a197f0f93d..36e09ca9130b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2298,6 +2298,12 @@ static void __exit ubifs_exit(void) dbg_debugfs_exit(); ubifs_compressors_exit(); unregister_shrinker(&ubifs_shrinker_info); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ubifs_inode_slab); unregister_filesystem(&ubifs_fs_type); } diff --git a/fs/udf/super.c b/fs/udf/super.c index 18fc038a438d..b8d27642ab06 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -171,6 +171,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(udf_inode_cachep); } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 444927e5706b..f7cfecfe1cab 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1466,6 +1466,11 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ufs_inode_cachep); } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 19e2380fb867..83d36e473d2f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1506,6 +1506,11 @@ xfs_init_zones(void) STATIC void xfs_destroy_zones(void) { + /* + * Make sure all delayed rcu free are flushed before we + * destroy caches. + */ + rcu_barrier(); kmem_zone_destroy(xfs_ili_zone); kmem_zone_destroy(xfs_inode_zone); kmem_zone_destroy(xfs_efi_zone); -- cgit v1.2.3 From 314e51b9851b4f4e8ab302243ff5a6fc6147f379 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 8 Oct 2012 16:29:02 -0700 Subject: mm: kill vma flag VM_RESERVED and mm->reserved_vm counter A long time ago, in v2.4, VM_RESERVED kept swapout process off VMA, currently it lost original meaning but still has some effects: | effect | alternative flags -+------------------------+--------------------------------------------- 1| account as reserved_vm | VM_IO 2| skip in core dump | VM_IO, VM_DONTDUMP 3| do not merge or expand | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP 4| do not mlock | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP This patch removes reserved_vm counter from mm_struct. Seems like nobody cares about it, it does not exported into userspace directly, it only reduces total_vm showed in proc. Thus VM_RESERVED can be replaced with VM_IO or pair VM_DONTEXPAND | VM_DONTDUMP. remap_pfn_range() and io_remap_pfn_range() set VM_IO|VM_DONTEXPAND|VM_DONTDUMP. remap_vmalloc_range() set VM_DONTEXPAND | VM_DONTDUMP. [akpm@linux-foundation.org: drivers/vfio/pci/vfio_pci.c fixup] Signed-off-by: Konstantin Khlebnikov Cc: Alexander Viro Cc: Carsten Otte Cc: Chris Metcalf Cc: Cyrill Gorcunov Cc: Eric Paris Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Ingo Molnar Cc: James Morris Cc: Jason Baron Cc: Kentaro Takeda Cc: Matt Helsley Cc: Nick Piggin Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Robert Richter Cc: Suresh Siddha Cc: Tetsuo Handa Cc: Venkatesh Pallipadi Acked-by: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/unevictable-lru.txt | 4 ++-- arch/alpha/kernel/pci-sysfs.c | 2 +- arch/ia64/kernel/perfmon.c | 2 +- arch/ia64/mm/init.c | 3 ++- arch/powerpc/kvm/book3s_hv.c | 2 +- arch/sparc/kernel/pci.c | 2 +- arch/unicore32/kernel/process.c | 2 +- arch/x86/xen/mmu.c | 3 +-- drivers/char/mbcs.c | 2 +- drivers/char/mem.c | 2 +- drivers/char/mspec.c | 2 +- drivers/gpu/drm/drm_gem.c | 2 +- drivers/gpu/drm/drm_vm.c | 10 ++-------- drivers/gpu/drm/exynos/exynos_drm_gem.c | 2 +- drivers/gpu/drm/gma500/framebuffer.c | 3 +-- drivers/gpu/drm/ttm/ttm_bo_vm.c | 4 ++-- drivers/gpu/drm/udl/udl_fb.c | 2 +- drivers/infiniband/hw/ehca/ehca_uverbs.c | 4 ++-- drivers/infiniband/hw/ipath/ipath_file_ops.c | 2 +- drivers/infiniband/hw/qib/qib_file_ops.c | 2 +- drivers/media/pci/meye/meye.c | 2 +- drivers/media/platform/omap/omap_vout.c | 2 +- drivers/media/platform/vino.c | 2 +- drivers/media/usb/sn9c102/sn9c102_core.c | 3 +-- drivers/media/usb/usbvision/usbvision-video.c | 3 +-- drivers/media/v4l2-core/videobuf-dma-sg.c | 2 +- drivers/media/v4l2-core/videobuf-vmalloc.c | 2 +- drivers/media/v4l2-core/videobuf2-memops.c | 2 +- drivers/misc/carma/carma-fpga.c | 2 -- drivers/misc/sgi-gru/grufile.c | 5 ++--- drivers/mtd/mtdchar.c | 2 +- drivers/scsi/sg.c | 2 +- drivers/staging/omapdrm/omap_gem_dmabuf.c | 2 +- drivers/staging/tidspbridge/rmgr/drv_interface.c | 2 +- drivers/uio/uio.c | 4 +--- drivers/usb/mon/mon_bin.c | 2 +- drivers/video/68328fb.c | 2 +- drivers/video/aty/atyfb_base.c | 3 +-- drivers/video/fb-puv3.c | 3 +-- drivers/video/fb_defio.c | 2 +- drivers/video/fbmem.c | 3 +-- drivers/video/gbefb.c | 2 +- drivers/video/omap2/omapfb/omapfb-main.c | 2 +- drivers/video/sbuslib.c | 5 ++--- drivers/video/smscufx.c | 1 - drivers/video/udlfb.c | 1 - drivers/video/vermilion/vermilion.c | 1 - drivers/video/vfb.c | 1 - drivers/xen/gntalloc.c | 2 +- drivers/xen/gntdev.c | 2 +- drivers/xen/privcmd.c | 3 ++- fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/proc/task_mmu.c | 2 +- include/linux/mempolicy.h | 2 +- include/linux/mm.h | 3 +-- include/linux/mm_types.h | 1 - kernel/events/core.c | 2 +- mm/ksm.c | 3 +-- mm/memory.c | 11 +++++------ mm/mlock.c | 2 +- mm/mmap.c | 2 -- mm/nommu.c | 2 +- mm/vmalloc.c | 3 +-- security/selinux/selinuxfs.c | 2 +- sound/core/pcm_native.c | 6 +++--- sound/usb/usx2y/us122l.c | 2 +- sound/usb/usx2y/usX2Yhwdep.c | 2 +- sound/usb/usx2y/usx2yhwdeppcm.c | 2 +- 70 files changed, 77 insertions(+), 105 deletions(-) (limited to 'fs/hugetlbfs/inode.c') diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt index fa206cccf89f..323ff5dba1cc 100644 --- a/Documentation/vm/unevictable-lru.txt +++ b/Documentation/vm/unevictable-lru.txt @@ -371,8 +371,8 @@ mlock_fixup() filters several classes of "special" VMAs: mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to allocate the huge pages and populate the ptes. -3) VMAs with VM_DONTEXPAND or VM_RESERVED are generally userspace mappings of - kernel pages, such as the VDSO page, relay channel pages, etc. These pages +3) VMAs with VM_DONTEXPAND are generally userspace mappings of kernel pages, + such as the VDSO page, relay channel pages, etc. These pages are inherently unevictable and are not managed on the LRU lists. mlock_fixup() treats these VMAs the same as hugetlbfs VMAs. It calls make_pages_present() to populate the ptes. diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c index 53649c7d0068..b51f7b4818cd 100644 --- a/arch/alpha/kernel/pci-sysfs.c +++ b/arch/alpha/kernel/pci-sysfs.c @@ -26,7 +26,7 @@ static int hose_mmap_page_range(struct pci_controller *hose, base = sparse ? hose->sparse_io_base : hose->dense_io_base; vma->vm_pgoff += base >> PAGE_SHIFT; - vma->vm_flags |= (VM_IO | VM_RESERVED); + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index f388b4e18a37..ea39eba61ef5 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2307,7 +2307,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t */ vma->vm_mm = mm; vma->vm_file = get_file(filp); - vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; + vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ /* diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 0eab454867a2..082e383c1b6f 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -138,7 +138,8 @@ ia64_init_addr_space (void) vma->vm_mm = current->mm; vma->vm_end = PAGE_SIZE; vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); - vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED; + vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | + VM_DONTEXPAND | VM_DONTDUMP; down_write(¤t->mm->mmap_sem); if (insert_vm_struct(current->mm, vma)) { up_write(¤t->mm->mmap_sem); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 83e929e66f9d..721d4603a235 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1183,7 +1183,7 @@ static const struct vm_operations_struct kvm_rma_vm_ops = { static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) { - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &kvm_rma_vm_ops; return 0; } diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index acc8c838ff72..75b31bcdeadf 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -779,7 +779,7 @@ static int __pci_mmap_make_offset(struct pci_dev *pdev, static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state) { - vma->vm_flags |= (VM_IO | VM_RESERVED); + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; } /* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index b6f0458c3143..b008586dad75 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -380,7 +380,7 @@ int vectors_user_mapping(void) return install_special_mapping(mm, 0xffff0000, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC | - VM_RESERVED, + VM_DONTEXPAND | VM_DONTDUMP, NULL); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5a16824cc2b3..fd28d86fe3d2 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2451,8 +2451,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == - (VM_PFNMAP | VM_RESERVED | VM_IO))); + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); rmd.mfn = mfn; rmd.prot = prot; diff --git a/drivers/char/mbcs.c b/drivers/char/mbcs.c index 0c7d340b9ab9..f74e892711dd 100644 --- a/drivers/char/mbcs.c +++ b/drivers/char/mbcs.c @@ -507,7 +507,7 @@ static int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */ + /* Remap-pfn-range will mark the range VM_IO */ if (remap_pfn_range(vma, vma->vm_start, __pa(soft->gscr_addr) >> PAGE_SHIFT, diff --git a/drivers/char/mem.c b/drivers/char/mem.c index e5eedfa24c91..0537903c985b 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -322,7 +322,7 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma) vma->vm_ops = &mmap_mem_ops; - /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */ + /* Remap-pfn-range will mark the range VM_IO */ if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c index 845f97fd1832..e1f60f968fdd 100644 --- a/drivers/char/mspec.c +++ b/drivers/char/mspec.c @@ -286,7 +286,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma, atomic_set(&vdata->refcnt, 1); vma->vm_private_data = vdata; - vma->vm_flags |= (VM_IO | VM_RESERVED | VM_PFNMAP | VM_DONTEXPAND); + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; if (vdata->type == MSPEC_FETCHOP || vdata->type == MSPEC_UNCACHED) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); vma->vm_ops = &mspec_vm_ops; diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 92177d5aedee..24efae464e2c 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -706,7 +706,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) goto out_unlock; } - vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = obj->dev->driver->gem_vm_ops; vma->vm_private_data = map->handle; vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index 23a824e6a22a..db7bd292410b 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -514,8 +514,7 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &drm_vm_dma_ops; - vma->vm_flags |= VM_RESERVED; /* Don't swap */ - vma->vm_flags |= VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; drm_vm_open_locked(dev, vma); return 0; @@ -643,21 +642,16 @@ int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma) case _DRM_SHM: vma->vm_ops = &drm_vm_shm_ops; vma->vm_private_data = (void *)map; - /* Don't let this area swap. Change when - DRM_KERNEL advisory is supported. */ - vma->vm_flags |= VM_RESERVED; break; case _DRM_SCATTER_GATHER: vma->vm_ops = &drm_vm_sg_ops; vma->vm_private_data = (void *)map; - vma->vm_flags |= VM_RESERVED; vma->vm_page_prot = drm_dma_prot(map->type, vma); break; default: return -EINVAL; /* This should never happen. */ } - vma->vm_flags |= VM_RESERVED; /* Don't swap */ - vma->vm_flags |= VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; drm_vm_open_locked(dev, vma); return 0; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index fcdbe46914f7..d2545560664f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -500,7 +500,7 @@ static int exynos_drm_gem_mmap_buffer(struct file *filp, DRM_DEBUG_KMS("%s\n", __FILE__); - vma->vm_flags |= (VM_IO | VM_RESERVED); + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; update_vm_cache_attr(exynos_gem_obj, vma); diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index 884ba73ac6ce..afded54dbb10 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -178,8 +178,7 @@ static int psbfb_mmap(struct fb_info *info, struct vm_area_struct *vma) */ vma->vm_ops = &psbfb_vm_ops; vma->vm_private_data = (void *)psbfb; - vma->vm_flags |= VM_RESERVED | VM_IO | - VM_MIXEDMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP; return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index a877813571a4..3ba72dbdc4bd 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -285,7 +285,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, */ vma->vm_private_data = bo; - vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP; return 0; out_unref: ttm_bo_unref(&bo); @@ -300,7 +300,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) vma->vm_ops = &ttm_bo_vm_ops; vma->vm_private_data = ttm_bo_reference(bo); - vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND; return 0; } EXPORT_SYMBOL(ttm_fbdev_mmap); diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index 67df842fbb33..69a2b16f42a6 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -243,7 +243,7 @@ static int udl_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) size = 0; } - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 45ee89b65c23..1a1d5d99fcf9 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -117,7 +117,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, physical = galpas->user.fw_handle; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); - /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, vma->vm_page_prot); if (unlikely(ret)) { @@ -139,7 +139,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, u64 start, ofs; struct page *page; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; start = vma->vm_start; for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 736d9edbdbe7..3eb7e454849b 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1225,7 +1225,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; vma->vm_ops = &ipath_file_vm_ops; - vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; ret = 1; bail: diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index faa44cb08071..959a5c4ff812 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -971,7 +971,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; vma->vm_ops = &qib_file_vm_ops; - vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; ret = 1; bail: diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c index 7bc775219f97..e5a76da86081 100644 --- a/drivers/media/pci/meye/meye.c +++ b/drivers/media/pci/meye/meye.c @@ -1647,7 +1647,7 @@ static int meye_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_ops = &meye_vm_ops; vma->vm_flags &= ~VM_IO; /* not I/O memory */ - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = (void *) (offset / gbufsize); meye_vm_open(vma); diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index 66ac21d466af..134016f0e660 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c @@ -911,7 +911,7 @@ static int omap_vout_mmap(struct file *file, struct vm_area_struct *vma) q->bufs[i]->baddr = vma->vm_start; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); vma->vm_ops = &omap_vout_vm_ops; vma->vm_private_data = (void *) vout; diff --git a/drivers/media/platform/vino.c b/drivers/media/platform/vino.c index 790d96cffeea..70b0bf4b2900 100644 --- a/drivers/media/platform/vino.c +++ b/drivers/media/platform/vino.c @@ -3950,7 +3950,7 @@ found: fb->map_count = 1; - vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_flags &= ~VM_IO; vma->vm_private_data = fb; vma->vm_file = file; diff --git a/drivers/media/usb/sn9c102/sn9c102_core.c b/drivers/media/usb/sn9c102/sn9c102_core.c index 19ea780b16ff..5bfc8e2f018f 100644 --- a/drivers/media/usb/sn9c102/sn9c102_core.c +++ b/drivers/media/usb/sn9c102/sn9c102_core.c @@ -2126,8 +2126,7 @@ static int sn9c102_mmap(struct file* filp, struct vm_area_struct *vma) return -EINVAL; } - vma->vm_flags |= VM_IO; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; pos = cam->frame[i].bufmem; while (size > 0) { /* size is page-aligned */ diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c index f67018ed3795..5c36a57e6590 100644 --- a/drivers/media/usb/usbvision/usbvision-video.c +++ b/drivers/media/usb/usbvision/usbvision-video.c @@ -1108,8 +1108,7 @@ static int usbvision_mmap(struct file *file, struct vm_area_struct *vma) } /* VM_IO is eventually going to replace PageReserved altogether */ - vma->vm_flags |= VM_IO; - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; pos = usbvision->frame[i].data; while (size > 0) { diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index f300deafd268..828e7c10bd70 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -582,7 +582,7 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q, map->count = 1; map->q = q; vma->vm_ops = &videobuf_vm_ops; - vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_flags &= ~VM_IO; /* using shared anonymous pages */ vma->vm_private_data = map; dprintk(1, "mmap %p: q=%p %08lx-%08lx pgoff %08lx bufs %d-%d\n", diff --git a/drivers/media/v4l2-core/videobuf-vmalloc.c b/drivers/media/v4l2-core/videobuf-vmalloc.c index df142580e44c..2ff7fcc77b11 100644 --- a/drivers/media/v4l2-core/videobuf-vmalloc.c +++ b/drivers/media/v4l2-core/videobuf-vmalloc.c @@ -270,7 +270,7 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q, } vma->vm_ops = &videobuf_vm_ops; - vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = map; dprintk(1, "mmap %p: q=%p %08lx-%08lx (%lx) pgoff %08lx buf %d\n", diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c index 504cd4cbe29e..051ea3571b20 100644 --- a/drivers/media/v4l2-core/videobuf2-memops.c +++ b/drivers/media/v4l2-core/videobuf2-memops.c @@ -163,7 +163,7 @@ int vb2_mmap_pfn_range(struct vm_area_struct *vma, unsigned long paddr, return ret; } - vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = priv; vma->vm_ops = vm_ops; diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c index 0c43297ed9ac..8835eabb3b87 100644 --- a/drivers/misc/carma/carma-fpga.c +++ b/drivers/misc/carma/carma-fpga.c @@ -1243,8 +1243,6 @@ static int data_mmap(struct file *filp, struct vm_area_struct *vma) return -EINVAL; } - /* IO memory (stop cacheing) */ - vma->vm_flags |= VM_IO | VM_RESERVED; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); return io_remap_pfn_range(vma, vma->vm_start, addr, vsize, diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index ecafa4ba238b..492c8cac69ac 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -108,9 +108,8 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) return -EINVAL; - vma->vm_flags |= - (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP | - VM_RESERVED); + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_LOCKED | + VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = PAGE_SHARED; vma->vm_ops = &gru_vm_ops; diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index a6e74514e662..73ae81a629f2 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1182,7 +1182,7 @@ static int mtdchar_mmap(struct file *file, struct vm_area_struct *vma) return -EINVAL; if (set_vm_offset(vma, off) < 0) return -EINVAL; - vma->vm_flags |= VM_IO | VM_RESERVED; + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; #ifdef pgprot_noncached if (file->f_flags & O_DSYNC || off >= __pa(high_memory)) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 9c5c5f2b3962..be2c9a6561ff 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1257,7 +1257,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) } sfp->mmap_called = 1; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = sfp; vma->vm_ops = &sg_mmap_vm_ops; return 0; diff --git a/drivers/staging/omapdrm/omap_gem_dmabuf.c b/drivers/staging/omapdrm/omap_gem_dmabuf.c index 42728e0cc194..c6f3ef6f57b9 100644 --- a/drivers/staging/omapdrm/omap_gem_dmabuf.c +++ b/drivers/staging/omapdrm/omap_gem_dmabuf.c @@ -160,7 +160,7 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer, goto out_unlock; } - vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = obj->dev->driver->gem_vm_ops; vma->vm_private_data = obj; vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); diff --git a/drivers/staging/tidspbridge/rmgr/drv_interface.c b/drivers/staging/tidspbridge/rmgr/drv_interface.c index bddea1d3b2c3..701a11ac676d 100644 --- a/drivers/staging/tidspbridge/rmgr/drv_interface.c +++ b/drivers/staging/tidspbridge/rmgr/drv_interface.c @@ -261,7 +261,7 @@ static int bridge_mmap(struct file *filp, struct vm_area_struct *vma) { u32 status; - vma->vm_flags |= VM_RESERVED | VM_IO; + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); dev_dbg(bridge, "%s: vm filp %p start %lx end %lx page_prot %ulx " diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index a783d533a1a6..5110f367f1f1 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -653,8 +653,6 @@ static int uio_mmap_physical(struct vm_area_struct *vma) if (mi < 0) return -EINVAL; - vma->vm_flags |= VM_IO | VM_RESERVED; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); return remap_pfn_range(vma, @@ -666,7 +664,7 @@ static int uio_mmap_physical(struct vm_area_struct *vma) static int uio_mmap_logical(struct vm_area_struct *vma) { - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &uio_vm_ops; uio_vma_open(vma); return 0; diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index 91cd85076a44..9a62e89d6dc0 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1247,7 +1247,7 @@ static int mon_bin_mmap(struct file *filp, struct vm_area_struct *vma) { /* don't do anything here: "fault" will set up page table entries */ vma->vm_ops = &mon_bin_vm_ops; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = filp->private_data; mon_bin_vma_open(vma); return 0; diff --git a/drivers/video/68328fb.c b/drivers/video/68328fb.c index a425d65d5ba2..fa44fbed397d 100644 --- a/drivers/video/68328fb.c +++ b/drivers/video/68328fb.c @@ -400,7 +400,7 @@ static int mc68x328fb_mmap(struct fb_info *info, struct vm_area_struct *vma) #ifndef MMU /* this is uClinux (no MMU) specific code */ - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_start = videomemory; return 0; diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index 3f2e8c13f1ca..868932f904ef 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -1942,8 +1942,7 @@ static int atyfb_mmap(struct fb_info *info, struct vm_area_struct *vma) off = vma->vm_pgoff << PAGE_SHIFT; size = vma->vm_end - vma->vm_start; - /* To stop the swapper from even considering these pages. */ - vma->vm_flags |= (VM_IO | VM_RESERVED); + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ if (((vma->vm_pgoff == 0) && (size == info->fix.smem_len)) || ((off == info->fix.smem_len) && (size == PAGE_SIZE))) diff --git a/drivers/video/fb-puv3.c b/drivers/video/fb-puv3.c index 60a787fa32cf..7d106f1f4906 100644 --- a/drivers/video/fb-puv3.c +++ b/drivers/video/fb-puv3.c @@ -653,9 +653,8 @@ int unifb_mmap(struct fb_info *info, vma->vm_page_prot)) return -EAGAIN; - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ return 0; - } static struct fb_ops unifb_ops = { diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 64cda560c488..88cad6b8b479 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c @@ -166,7 +166,7 @@ static const struct address_space_operations fb_deferred_io_aops = { static int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma) { vma->vm_ops = &fb_deferred_io_vm_ops; - vma->vm_flags |= ( VM_RESERVED | VM_DONTEXPAND ); + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; if (!(info->flags & FBINFO_VIRTFB)) vma->vm_flags |= VM_IO; vma->vm_private_data = info; diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 0dff12a1daef..3ff0105a496a 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1410,8 +1410,7 @@ fb_mmap(struct file *file, struct vm_area_struct * vma) return -EINVAL; off += start; vma->vm_pgoff = off >> PAGE_SHIFT; - /* This is an IO map - tell maydump to skip this VMA */ - vma->vm_flags |= VM_IO | VM_RESERVED; + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by io_remap_pfn_range()*/ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); fb_pgprotect(file, vma, off); if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c index 7e7b7a9ba274..05e2a8a99d8f 100644 --- a/drivers/video/gbefb.c +++ b/drivers/video/gbefb.c @@ -1024,7 +1024,7 @@ static int gbefb_mmap(struct fb_info *info, pgprot_val(vma->vm_page_prot) = pgprot_fb(pgprot_val(vma->vm_page_prot)); - vma->vm_flags |= VM_IO | VM_RESERVED; + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ /* look for the starting tile */ tile = &gbe_tiles.cpu[offset >> TILE_SHIFT]; diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c index 3c39aa8de928..15373f4aee19 100644 --- a/drivers/video/omap2/omapfb/omapfb-main.c +++ b/drivers/video/omap2/omapfb/omapfb-main.c @@ -1128,7 +1128,7 @@ static int omapfb_mmap(struct fb_info *fbi, struct vm_area_struct *vma) DBG("user mmap region start %lx, len %d, off %lx\n", start, len, off); vma->vm_pgoff = off >> PAGE_SHIFT; - vma->vm_flags |= VM_IO | VM_RESERVED; + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); vma->vm_ops = &mmap_user_ops; vma->vm_private_data = rg; diff --git a/drivers/video/sbuslib.c b/drivers/video/sbuslib.c index 3c1de981a18c..296afae442f4 100644 --- a/drivers/video/sbuslib.c +++ b/drivers/video/sbuslib.c @@ -57,9 +57,8 @@ int sbusfb_mmap_helper(struct sbus_mmap_map *map, off = vma->vm_pgoff << PAGE_SHIFT; - /* To stop the swapper from even considering these pages */ - vma->vm_flags |= (VM_IO | VM_RESERVED); - + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); /* Each page, see which map applies */ diff --git a/drivers/video/smscufx.c b/drivers/video/smscufx.c index 5533a32c6ca1..97bd6620c364 100644 --- a/drivers/video/smscufx.c +++ b/drivers/video/smscufx.c @@ -803,7 +803,6 @@ static int ufx_ops_mmap(struct fb_info *info, struct vm_area_struct *vma) size = 0; } - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ return 0; } diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c index 8af64148294b..f45eba3d6150 100644 --- a/drivers/video/udlfb.c +++ b/drivers/video/udlfb.c @@ -345,7 +345,6 @@ static int dlfb_ops_mmap(struct fb_info *info, struct vm_area_struct *vma) size = 0; } - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ return 0; } diff --git a/drivers/video/vermilion/vermilion.c b/drivers/video/vermilion/vermilion.c index 970e43d13f52..89aef343e295 100644 --- a/drivers/video/vermilion/vermilion.c +++ b/drivers/video/vermilion/vermilion.c @@ -1018,7 +1018,6 @@ static int vmlfb_mmap(struct fb_info *info, struct vm_area_struct *vma) offset += vinfo->vram_start; pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; pgprot_val(vma->vm_page_prot) &= ~_PAGE_PWT; - vma->vm_flags |= VM_RESERVED | VM_IO; if (remap_pfn_range(vma, vma->vm_start, offset >> PAGE_SHIFT, size, vma->vm_page_prot)) return -EAGAIN; diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c index 501a922aa9dc..c7f692525b88 100644 --- a/drivers/video/vfb.c +++ b/drivers/video/vfb.c @@ -439,7 +439,6 @@ static int vfb_mmap(struct fb_info *info, size = 0; } - vma->vm_flags |= VM_RESERVED; /* avoid to swap out this VMA */ return 0; } diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 934985d14c24..4097987b330e 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -535,7 +535,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_private_data = vm_priv; - vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &gntalloc_vmops; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 5df9fd847b2e..610bfc6be177 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -720,7 +720,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; if (use_ptemod) vma->vm_flags |= VM_DONTCOPY; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index ef6389580b8c..8adb9cc267f9 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -455,7 +455,8 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) { /* DONTCOPY is essential for Xen because copy_page_range doesn't know * how to recreate these mappings */ - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY | + VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &privcmd_vm_ops; vma->vm_private_data = NULL; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 2b72d26e2e4b..e800dec958c3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1135,7 +1135,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, } /* Do not dump I/O mapped devices or special mappings */ - if (vma->vm_flags & (VM_IO | VM_RESERVED)) + if (vma->vm_flags & VM_IO) return 0; /* By default, dump shared memory if mapped from an anonymous file. */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 08d812b32282..262db114ff01 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1205,7 +1205,7 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags) int dump_ok; /* Do not dump I/O mapped devices or special mappings */ - if (vma->vm_flags & (VM_IO | VM_RESERVED)) { + if (vma->vm_flags & VM_IO) { kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags); return 0; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9460120a5170..0a0ab8e21b19 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -110,7 +110,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap_pgoff unwinds (may be important on powerpc * and ia64). */ - vma->vm_flags |= VM_HUGETLB | VM_RESERVED; + vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &hugetlb_vm_ops; if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4540b8f76f16..79827ce03e3b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -54,7 +54,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) "VmPTE:\t%8lu kB\n" "VmSwap:\t%8lu kB\n", hiwater_vm << (PAGE_SHIFT-10), - (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), + total_vm << (PAGE_SHIFT-10), mm->locked_vm << (PAGE_SHIFT-10), mm->pinned_vm << (PAGE_SHIFT-10), hiwater_rss << (PAGE_SHIFT-10), diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 95b738c7abff..ba7a0ff19d39 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -239,7 +239,7 @@ extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, /* Check if a vma is migratable */ static inline int vma_migratable(struct vm_area_struct *vma) { - if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED)) + if (vma->vm_flags & (VM_IO | VM_HUGETLB | VM_PFNMAP)) return 0; /* * Migration allocates pages in the highest zone. If we cannot diff --git a/include/linux/mm.h b/include/linux/mm.h index dc08d558e058..0514fe9d3c84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -96,7 +96,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ -#define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */ #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ @@ -148,7 +147,7 @@ extern unsigned int kobjsize(const void *objp); * Special vmas that are non-mergable, non-mlock()able. * Note: mm/huge_memory.c VM_NO_THP depends on this definition. */ -#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP) /* * mapping from the currently active vm_flags protection bits (the diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 58d3173eb365..a57a43f5ca7c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -349,7 +349,6 @@ struct mm_struct { unsigned long shared_vm; /* Shared pages (files) */ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ unsigned long stack_vm; /* VM_GROWSUP/DOWN */ - unsigned long reserved_vm; /* VM_RESERVED|VM_IO pages */ unsigned long def_flags; unsigned long nr_ptes; /* Page table pages */ unsigned long start_code, end_code, start_data, end_data; diff --git a/kernel/events/core.c b/kernel/events/core.c index f16f3c58f11a..cda3ebd49e86 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3671,7 +3671,7 @@ unlock: atomic_inc(&event->mmap_count); mutex_unlock(&event->mmap_mutex); - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &perf_mmap_vmops; return ret; diff --git a/mm/ksm.c b/mm/ksm.c index f9ccb16559ee..9638620a7530 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1469,8 +1469,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, */ if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_RESERVED | VM_HUGETLB | - VM_NONLINEAR | VM_MIXEDMAP)) + VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP)) return 0; /* just ignore the advice */ #ifdef VM_SAO diff --git a/mm/memory.c b/mm/memory.c index 7b1e4feaec06..e09c04813186 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2297,14 +2297,13 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * rest of the world about it: * VM_IO tells people not to look at these pages * (accesses can have side effects). - * VM_RESERVED is specified all over the place, because - * in 2.4 it kept swapout's vma scan off this vma; but - * in 2.6 the LRU scan won't even find its pages, so this - * flag means no more than count its pages in reserved_vm, - * and omit it from core dump, even when VM_IO turned off. * VM_PFNMAP tells the core MM that the base pages are just * raw PFN mappings, and do not have a "struct page" associated * with them. + * VM_DONTEXPAND + * Disable vma merging and expanding with mremap(). + * VM_DONTDUMP + * Omit vma from core dump, even when VM_IO turned off. * * There's a horrible special case to handle copy-on-write * behaviour that some programs depend on. We mark the "original" @@ -2321,7 +2320,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, if (err) return -EINVAL; - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; diff --git a/mm/mlock.c b/mm/mlock.c index ef726e8aa8e9..a948be4b7ba7 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -227,7 +227,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, if (vma->vm_flags & (VM_IO | VM_PFNMAP)) goto no_mlock; - if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || + if (!((vma->vm_flags & VM_DONTEXPAND) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))) { diff --git a/mm/mmap.c b/mm/mmap.c index c1ad2e78ea58..a76042dc806d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -945,8 +945,6 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags, mm->exec_vm += pages; } else if (flags & stack_flags) mm->stack_vm += pages; - if (flags & (VM_RESERVED|VM_IO)) - mm->reserved_vm += pages; } #endif /* CONFIG_PROC_FS */ diff --git a/mm/nommu.c b/mm/nommu.c index 9c4a7b63a4df..12e84e69dd06 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1811,7 +1811,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, if (addr != (pfn << PAGE_SHIFT)) return -EINVAL; - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; return 0; } EXPORT_SYMBOL(remap_pfn_range); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2bb90b1d241c..8de704679bfc 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2163,8 +2163,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, usize -= PAGE_SIZE; } while (usize > 0); - /* Prevent "things" like memory migration? VM_flags need a cleanup... */ - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; return 0; } diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 55af8c5b57e6..3a6e8731646c 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -485,7 +485,7 @@ static int sel_mmap_policy(struct file *filp, struct vm_area_struct *vma) return -EACCES; } - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &sel_mmap_policy_ops; return 0; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 20554eff5a21..5e12e5bacbba 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3039,7 +3039,7 @@ static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file return -EINVAL; area->vm_ops = &snd_pcm_vm_ops_status; area->vm_private_data = substream; - area->vm_flags |= VM_RESERVED; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; return 0; } @@ -3076,7 +3076,7 @@ static int snd_pcm_mmap_control(struct snd_pcm_substream *substream, struct file return -EINVAL; area->vm_ops = &snd_pcm_vm_ops_control; area->vm_private_data = substream; - area->vm_flags |= VM_RESERVED; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; return 0; } #else /* ! coherent mmap */ @@ -3170,7 +3170,7 @@ static const struct vm_operations_struct snd_pcm_vm_ops_data_fault = { int snd_pcm_lib_default_mmap(struct snd_pcm_substream *substream, struct vm_area_struct *area) { - area->vm_flags |= VM_RESERVED; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; #ifdef ARCH_HAS_DMA_MMAP_COHERENT if (!substream->ops->page && substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV) diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c index c4fd3b1d9592..d0323a693ba2 100644 --- a/sound/usb/usx2y/us122l.c +++ b/sound/usb/usx2y/us122l.c @@ -262,7 +262,7 @@ static int usb_stream_hwdep_mmap(struct snd_hwdep *hw, } area->vm_ops = &usb_stream_hwdep_vm_ops; - area->vm_flags |= VM_RESERVED; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; area->vm_private_data = us122l; atomic_inc(&us122l->mmap_count); out: diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c index 04aafb43a13c..0b34dbc8f302 100644 --- a/sound/usb/usx2y/usX2Yhwdep.c +++ b/sound/usb/usx2y/usX2Yhwdep.c @@ -82,7 +82,7 @@ static int snd_us428ctls_mmap(struct snd_hwdep * hw, struct file *filp, struct v us428->us428ctls_sharedmem->CtlSnapShotLast = -2; } area->vm_ops = &us428ctls_vm_ops; - area->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; area->vm_private_data = hw->private_data; return 0; } diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c index 8e40b6e67e9e..cc56007791e0 100644 --- a/sound/usb/usx2y/usx2yhwdeppcm.c +++ b/sound/usb/usx2y/usx2yhwdeppcm.c @@ -723,7 +723,7 @@ static int snd_usX2Y_hwdep_pcm_mmap(struct snd_hwdep * hw, struct file *filp, st return -ENODEV; } area->vm_ops = &snd_usX2Y_hwdep_pcm_vm_ops; - area->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; area->vm_private_data = hw->private_data; return 0; } -- cgit v1.2.3 From 6b2dbba8b6ac4df26f72eda1e5ea7bab9f950e08 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Mon, 8 Oct 2012 16:31:25 -0700 Subject: mm: replace vma prio_tree with an interval tree Implement an interval tree as a replacement for the VMA prio_tree. The algorithms are similar to lib/interval_tree.c; however that code can't be directly reused as the interval endpoints are not explicitly stored in the VMA. So instead, the common algorithm is moved into a template and the details (node type, how to get interval endpoints from the node, etc) are filled in using the C preprocessor. Once the interval tree functions are available, using them as a replacement to the VMA prio tree is a relatively simple, mechanical job. Signed-off-by: Michel Lespinasse Cc: Rik van Riel Cc: Hillf Danton Cc: Peter Zijlstra Cc: Catalin Marinas Cc: Andrea Arcangeli Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/fault-armv.c | 3 +- arch/arm/mm/flush.c | 3 +- arch/parisc/kernel/cache.c | 3 +- arch/x86/mm/hugetlbpage.c | 3 +- fs/hugetlbfs/inode.c | 9 +- fs/inode.c | 2 +- include/linux/fs.h | 6 +- include/linux/interval_tree_tmpl.h | 215 +++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 30 +++--- include/linux/mm_types.h | 14 +-- kernel/events/uprobes.c | 3 +- kernel/fork.c | 2 +- lib/interval_tree.c | 166 ++-------------------------- lib/prio_tree.c | 19 +--- mm/Makefile | 4 +- mm/filemap_xip.c | 3 +- mm/fremap.c | 2 +- mm/hugetlb.c | 3 +- mm/interval_tree.c | 61 +++++++++++ mm/memory-failure.c | 3 +- mm/memory.c | 9 +- mm/mmap.c | 22 ++-- mm/nommu.c | 12 +-- mm/prio_tree.c | 208 ----------------------------------- mm/rmap.c | 18 ++-- 25 files changed, 357 insertions(+), 466 deletions(-) create mode 100644 include/linux/interval_tree_tmpl.h create mode 100644 mm/interval_tree.c delete mode 100644 mm/prio_tree.c (limited to 'fs/hugetlbfs/inode.c') diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 7599e2625c7d..2a5907b5c8d2 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *mpnt; - struct prio_tree_iter iter; unsigned long offset; pgoff_t pgoff; int aliases = 0; @@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, * cache coherency. */ flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { /* * If this VMA is not in our MM, we can ignore it. * Note that we intentionally mask out the VMA diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 40ca11ed6e5f..1c8f7f564175 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p { struct mm_struct *mm = current->active_mm; struct vm_area_struct *mpnt; - struct prio_tree_iter iter; pgoff_t pgoff; /* @@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { unsigned long offset; /* diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 9d181890a7e3..48e16dc20102 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -276,7 +276,6 @@ void flush_dcache_page(struct page *page) { struct address_space *mapping = page_mapping(page); struct vm_area_struct *mpnt; - struct prio_tree_iter iter; unsigned long offset; unsigned long addr, old_addr = 0; pgoff_t pgoff; @@ -299,7 +298,7 @@ void flush_dcache_page(struct page *page) * to flush one address here for them all to become coherent */ flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; addr = mpnt->vm_start + offset; diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index b91e48512425..937bff5cdaa7 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) struct address_space *mapping = vma->vm_file->f_mapping; pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - struct prio_tree_iter iter; struct vm_area_struct *svma; unsigned long saddr; pte_t *spte = NULL; @@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) return (pte_t *)pmd_alloc(mm, pud, addr); mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { + vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { if (svma == vma) continue; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 0a0ab8e21b19..c5bc355d8243 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -397,17 +397,16 @@ static void hugetlbfs_evict_inode(struct inode *inode) } static inline void -hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) +hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff) { struct vm_area_struct *vma; - struct prio_tree_iter iter; - vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { + vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) { unsigned long v_offset; /* * Can the expression below overflow on 32-bit arches? - * No, because the prio_tree returns us only those vmas + * No, because the interval tree returns us only those vmas * which overlap the truncated area starting at pgoff, * and no vma on a 32-bit arch can span beyond the 4GB. */ @@ -432,7 +431,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) i_size_write(inode, offset); mutex_lock(&mapping->i_mmap_mutex); - if (!prio_tree_empty(&mapping->i_mmap)) + if (!RB_EMPTY_ROOT(&mapping->i_mmap)) hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); mutex_unlock(&mapping->i_mmap_mutex); truncate_hugepages(inode, offset); diff --git a/fs/inode.c b/fs/inode.c index ac8d904b3f16..b03c71957246 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -348,7 +348,7 @@ void address_space_init_once(struct address_space *mapping) mutex_init(&mapping->i_mmap_mutex); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); - INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); + mapping->i_mmap = RB_ROOT; INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); } EXPORT_SYMBOL(address_space_init_once); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5a8a273d5b2f..c617ed024df8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -401,7 +401,7 @@ struct inodes_stat_t { #include #include #include -#include +#include #include #include #include @@ -669,7 +669,7 @@ struct address_space { struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and lock protecting it */ unsigned int i_mmap_writable;/* count VM_SHARED mappings */ - struct prio_tree_root i_mmap; /* tree of private and shared mappings */ + struct rb_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ struct mutex i_mmap_mutex; /* protect tree, count, list */ /* Protected by tree_lock together with the radix tree */ @@ -741,7 +741,7 @@ int mapping_tagged(struct address_space *mapping, int tag); */ static inline int mapping_mapped(struct address_space *mapping) { - return !prio_tree_empty(&mapping->i_mmap) || + return !RB_EMPTY_ROOT(&mapping->i_mmap) || !list_empty(&mapping->i_mmap_nonlinear); } diff --git a/include/linux/interval_tree_tmpl.h b/include/linux/interval_tree_tmpl.h new file mode 100644 index 000000000000..c65deda31413 --- /dev/null +++ b/include/linux/interval_tree_tmpl.h @@ -0,0 +1,215 @@ +/* + Interval Trees + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + include/linux/interval_tree_tmpl.h +*/ + +/* + * Template for implementing interval trees + * + * ITSTRUCT: struct type of the interval tree nodes + * ITRB: name of struct rb_node field within ITSTRUCT + * ITTYPE: type of the interval endpoints + * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree + * ITSTART(n): start endpoint of ITSTRUCT node n + * ITLAST(n): last endpoing of ITSTRUCT node n + * ITSTATIC: 'static' or empty + * ITPREFIX: prefix to use for the inline tree definitions + */ + +/* IT(name) -> ITPREFIX_name */ +#define _ITNAME(prefix, name) prefix ## _ ## name +#define ITNAME(prefix, name) _ITNAME(prefix, name) +#define IT(name) ITNAME(ITPREFIX, name) + +/* Callbacks for augmented rbtree insert and remove */ + +static inline ITTYPE IT(compute_subtree_last)(ITSTRUCT *node) +{ + ITTYPE max = ITLAST(node), subtree_last; + if (node->ITRB.rb_left) { + subtree_last = rb_entry(node->ITRB.rb_left, + ITSTRUCT, ITRB)->ITSUBTREE; + if (max < subtree_last) + max = subtree_last; + } + if (node->ITRB.rb_right) { + subtree_last = rb_entry(node->ITRB.rb_right, + ITSTRUCT, ITRB)->ITSUBTREE; + if (max < subtree_last) + max = subtree_last; + } + return max; +} + +static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop) +{ + while (rb != stop) { + ITSTRUCT *node = rb_entry(rb, ITSTRUCT, ITRB); + ITTYPE subtree_last = IT(compute_subtree_last)(node); + if (node->ITSUBTREE == subtree_last) + break; + node->ITSUBTREE = subtree_last; + rb = rb_parent(&node->ITRB); + } +} + +static void IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new) +{ + ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB); + ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB); + + new->ITSUBTREE = old->ITSUBTREE; +} + +static void IT(augment_rotate)(struct rb_node *rb_old, struct rb_node *rb_new) +{ + ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB); + ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB); + + new->ITSUBTREE = old->ITSUBTREE; + old->ITSUBTREE = IT(compute_subtree_last)(old); +} + +static const struct rb_augment_callbacks IT(augment_callbacks) = { + IT(augment_propagate), IT(augment_copy), IT(augment_rotate) +}; + +/* Insert / remove interval nodes from the tree */ + +ITSTATIC void IT(insert)(ITSTRUCT *node, struct rb_root *root) +{ + struct rb_node **link = &root->rb_node, *rb_parent = NULL; + ITTYPE start = ITSTART(node), last = ITLAST(node); + ITSTRUCT *parent; + + while (*link) { + rb_parent = *link; + parent = rb_entry(rb_parent, ITSTRUCT, ITRB); + if (parent->ITSUBTREE < last) + parent->ITSUBTREE = last; + if (start < ITSTART(parent)) + link = &parent->ITRB.rb_left; + else + link = &parent->ITRB.rb_right; + } + + node->ITSUBTREE = last; + rb_link_node(&node->ITRB, rb_parent, link); + rb_insert_augmented(&node->ITRB, root, &IT(augment_callbacks)); +} + +ITSTATIC void IT(remove)(ITSTRUCT *node, struct rb_root *root) +{ + rb_erase_augmented(&node->ITRB, root, &IT(augment_callbacks)); +} + +/* + * Iterate over intervals intersecting [start;last] + * + * Note that a node's interval intersects [start;last] iff: + * Cond1: ITSTART(node) <= last + * and + * Cond2: start <= ITLAST(node) + */ + +static ITSTRUCT *IT(subtree_search)(ITSTRUCT *node, ITTYPE start, ITTYPE last) +{ + while (true) { + /* + * Loop invariant: start <= node->ITSUBTREE + * (Cond2 is satisfied by one of the subtree nodes) + */ + if (node->ITRB.rb_left) { + ITSTRUCT *left = rb_entry(node->ITRB.rb_left, + ITSTRUCT, ITRB); + if (start <= left->ITSUBTREE) { + /* + * Some nodes in left subtree satisfy Cond2. + * Iterate to find the leftmost such node N. + * If it also satisfies Cond1, that's the match + * we are looking for. Otherwise, there is no + * matching interval as nodes to the right of N + * can't satisfy Cond1 either. + */ + node = left; + continue; + } + } + if (ITSTART(node) <= last) { /* Cond1 */ + if (start <= ITLAST(node)) /* Cond2 */ + return node; /* node is leftmost match */ + if (node->ITRB.rb_right) { + node = rb_entry(node->ITRB.rb_right, + ITSTRUCT, ITRB); + if (start <= node->ITSUBTREE) + continue; + } + } + return NULL; /* No match */ + } +} + +ITSTATIC ITSTRUCT *IT(iter_first)(struct rb_root *root, + ITTYPE start, ITTYPE last) +{ + ITSTRUCT *node; + + if (!root->rb_node) + return NULL; + node = rb_entry(root->rb_node, ITSTRUCT, ITRB); + if (node->ITSUBTREE < start) + return NULL; + return IT(subtree_search)(node, start, last); +} + +ITSTATIC ITSTRUCT *IT(iter_next)(ITSTRUCT *node, ITTYPE start, ITTYPE last) +{ + struct rb_node *rb = node->ITRB.rb_right, *prev; + + while (true) { + /* + * Loop invariants: + * Cond1: ITSTART(node) <= last + * rb == node->ITRB.rb_right + * + * First, search right subtree if suitable + */ + if (rb) { + ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); + if (start <= right->ITSUBTREE) + return IT(subtree_search)(right, start, last); + } + + /* Move up the tree until we come from a node's left child */ + do { + rb = rb_parent(&node->ITRB); + if (!rb) + return NULL; + prev = &node->ITRB; + node = rb_entry(rb, ITSTRUCT, ITRB); + rb = node->ITRB.rb_right; + } while (prev == rb); + + /* Check if the node intersects [start;last] */ + if (last < ITSTART(node)) /* !Cond1 */ + return NULL; + else if (start <= ITLAST(node)) /* Cond2 */ + return node; + } +} diff --git a/include/linux/mm.h b/include/linux/mm.h index 5ddb11b2b4bb..0f671ef09eba 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -1355,22 +1354,27 @@ extern void zone_pcp_reset(struct zone *zone); extern atomic_long_t mmap_pages_allocated; extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); -/* prio_tree.c */ -void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); -void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); -void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *); -struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma, - struct prio_tree_iter *iter); - -#define vma_prio_tree_foreach(vma, iter, root, begin, end) \ - for (prio_tree_iter_init(iter, root, begin, end), vma = NULL; \ - (vma = vma_prio_tree_next(vma, iter)); ) +/* interval_tree.c */ +void vma_interval_tree_add(struct vm_area_struct *vma, + struct vm_area_struct *old, + struct address_space *mapping); +void vma_interval_tree_insert(struct vm_area_struct *node, + struct rb_root *root); +void vma_interval_tree_remove(struct vm_area_struct *node, + struct rb_root *root); +struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root, + unsigned long start, unsigned long last); +struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, + unsigned long start, unsigned long last); + +#define vma_interval_tree_foreach(vma, root, start, last) \ + for (vma = vma_interval_tree_iter_first(root, start, last); \ + vma; vma = vma_interval_tree_iter_next(vma, start, last)) static inline void vma_nonlinear_insert(struct vm_area_struct *vma, struct list_head *list) { - vma->shared.vm_set.parent = NULL; - list_add_tail(&vma->shared.vm_set.list, list); + list_add_tail(&vma->shared.nonlinear, list); } /* mmap.c */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index a57a43f5ca7c..31f8a3af7d94 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -240,18 +239,15 @@ struct vm_area_struct { /* * For areas with an address space and backing store, - * linkage into the address_space->i_mmap prio tree, or - * linkage to the list of like vmas hanging off its node, or + * linkage into the address_space->i_mmap interval tree, or * linkage of vma in the address_space->i_mmap_nonlinear list. */ union { struct { - struct list_head list; - void *parent; /* aligns with prio_tree_node parent */ - struct vm_area_struct *head; - } vm_set; - - struct raw_prio_tree_node prio_tree_node; + struct rb_node rb; + unsigned long rb_subtree_last; + } linear; + struct list_head nonlinear; } shared; /* diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 912ef48d28ab..1d9c0a985960 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -735,7 +735,6 @@ static struct map_info * build_map_info(struct address_space *mapping, loff_t offset, bool is_register) { unsigned long pgoff = offset >> PAGE_SHIFT; - struct prio_tree_iter iter; struct vm_area_struct *vma; struct map_info *curr = NULL; struct map_info *prev = NULL; @@ -744,7 +743,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) again: mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) continue; diff --git a/kernel/fork.c b/kernel/fork.c index 972762e01024..90dace52715e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -423,7 +423,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mapping->i_mmap_writable++; flush_dcache_mmap_lock(mapping); /* insert tmp into the share list, just after mpnt */ - vma_prio_tree_add(tmp, mpnt); + vma_interval_tree_add(tmp, mpnt, mapping); flush_dcache_mmap_unlock(mapping); mutex_unlock(&mapping->i_mmap_mutex); } diff --git a/lib/interval_tree.c b/lib/interval_tree.c index 6fd540b1e499..77a793e0644b 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -1,159 +1,13 @@ #include #include -/* Callbacks for augmented rbtree insert and remove */ - -static inline unsigned long -compute_subtree_last(struct interval_tree_node *node) -{ - unsigned long max = node->last, subtree_last; - if (node->rb.rb_left) { - subtree_last = rb_entry(node->rb.rb_left, - struct interval_tree_node, rb)->__subtree_last; - if (max < subtree_last) - max = subtree_last; - } - if (node->rb.rb_right) { - subtree_last = rb_entry(node->rb.rb_right, - struct interval_tree_node, rb)->__subtree_last; - if (max < subtree_last) - max = subtree_last; - } - return max; -} - -RB_DECLARE_CALLBACKS(static, augment_callbacks, struct interval_tree_node, rb, - unsigned long, __subtree_last, compute_subtree_last) - -/* Insert / remove interval nodes from the tree */ - -void interval_tree_insert(struct interval_tree_node *node, - struct rb_root *root) -{ - struct rb_node **link = &root->rb_node, *rb_parent = NULL; - unsigned long start = node->start, last = node->last; - struct interval_tree_node *parent; - - while (*link) { - rb_parent = *link; - parent = rb_entry(rb_parent, struct interval_tree_node, rb); - if (parent->__subtree_last < last) - parent->__subtree_last = last; - if (start < parent->start) - link = &parent->rb.rb_left; - else - link = &parent->rb.rb_right; - } - - node->__subtree_last = last; - rb_link_node(&node->rb, rb_parent, link); - rb_insert_augmented(&node->rb, root, &augment_callbacks); -} - -void interval_tree_remove(struct interval_tree_node *node, - struct rb_root *root) -{ - rb_erase_augmented(&node->rb, root, &augment_callbacks); -} - -/* - * Iterate over intervals intersecting [start;last] - * - * Note that a node's interval intersects [start;last] iff: - * Cond1: node->start <= last - * and - * Cond2: start <= node->last - */ - -static struct interval_tree_node * -subtree_search(struct interval_tree_node *node, - unsigned long start, unsigned long last) -{ - while (true) { - /* - * Loop invariant: start <= node->__subtree_last - * (Cond2 is satisfied by one of the subtree nodes) - */ - if (node->rb.rb_left) { - struct interval_tree_node *left = - rb_entry(node->rb.rb_left, - struct interval_tree_node, rb); - if (start <= left->__subtree_last) { - /* - * Some nodes in left subtree satisfy Cond2. - * Iterate to find the leftmost such node N. - * If it also satisfies Cond1, that's the match - * we are looking for. Otherwise, there is no - * matching interval as nodes to the right of N - * can't satisfy Cond1 either. - */ - node = left; - continue; - } - } - if (node->start <= last) { /* Cond1 */ - if (start <= node->last) /* Cond2 */ - return node; /* node is leftmost match */ - if (node->rb.rb_right) { - node = rb_entry(node->rb.rb_right, - struct interval_tree_node, rb); - if (start <= node->__subtree_last) - continue; - } - } - return NULL; /* No match */ - } -} - -struct interval_tree_node * -interval_tree_iter_first(struct rb_root *root, - unsigned long start, unsigned long last) -{ - struct interval_tree_node *node; - - if (!root->rb_node) - return NULL; - node = rb_entry(root->rb_node, struct interval_tree_node, rb); - if (node->__subtree_last < start) - return NULL; - return subtree_search(node, start, last); -} - -struct interval_tree_node * -interval_tree_iter_next(struct interval_tree_node *node, - unsigned long start, unsigned long last) -{ - struct rb_node *rb = node->rb.rb_right, *prev; - - while (true) { - /* - * Loop invariants: - * Cond1: node->start <= last - * rb == node->rb.rb_right - * - * First, search right subtree if suitable - */ - if (rb) { - struct interval_tree_node *right = - rb_entry(rb, struct interval_tree_node, rb); - if (start <= right->__subtree_last) - return subtree_search(right, start, last); - } - - /* Move up the tree until we come from a node's left child */ - do { - rb = rb_parent(&node->rb); - if (!rb) - return NULL; - prev = &node->rb; - node = rb_entry(rb, struct interval_tree_node, rb); - rb = node->rb.rb_right; - } while (prev == rb); - - /* Check if the node intersects [start;last] */ - if (last < node->start) /* !Cond1 */ - return NULL; - else if (start <= node->last) /* Cond2 */ - return node; - } -} +#define ITSTRUCT struct interval_tree_node +#define ITRB rb +#define ITTYPE unsigned long +#define ITSUBTREE __subtree_last +#define ITSTART(n) ((n)->start) +#define ITLAST(n) ((n)->last) +#define ITSTATIC +#define ITPREFIX interval_tree + +#include diff --git a/lib/prio_tree.c b/lib/prio_tree.c index 4e0d2edff2b4..bba37148c15e 100644 --- a/lib/prio_tree.c +++ b/lib/prio_tree.c @@ -44,27 +44,12 @@ * The following macros are used for implementing prio_tree for i_mmap */ -#define RADIX_INDEX(vma) ((vma)->vm_pgoff) -#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT) -/* avoid overflow */ -#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1)) - - static void get_index(const struct prio_tree_root *root, const struct prio_tree_node *node, unsigned long *radix, unsigned long *heap) { - if (root->raw) { - struct vm_area_struct *vma = prio_tree_entry( - node, struct vm_area_struct, shared.prio_tree_node); - - *radix = RADIX_INDEX(vma); - *heap = HEAP_INDEX(vma); - } - else { - *radix = node->start; - *heap = node->last; - } + *radix = node->start; + *heap = node->last; } static unsigned long index_bits_to_maxindex[BITS_PER_LONG]; diff --git a/mm/Makefile b/mm/Makefile index 92753e2d82da..6b025f80af34 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -14,9 +14,9 @@ endif obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ - prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ + util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ - compaction.o $(mmu-y) + compaction.o interval_tree.o $(mmu-y) obj-y += init-mm.o diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 91750227a191..a52daee11d3f 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -167,7 +167,6 @@ __xip_unmap (struct address_space * mapping, { struct vm_area_struct *vma; struct mm_struct *mm; - struct prio_tree_iter iter; unsigned long address; pte_t *pte; pte_t pteval; @@ -184,7 +183,7 @@ __xip_unmap (struct address_space * mapping, retry: mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { mm = vma->vm_mm; address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); diff --git a/mm/fremap.c b/mm/fremap.c index 3d731a498788..3899a86851ce 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -214,7 +214,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); vma->vm_flags |= VM_NONLINEAR; - vma_prio_tree_remove(vma, &mapping->i_mmap); + vma_interval_tree_remove(vma, &mapping->i_mmap); vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); flush_dcache_mmap_unlock(mapping); mutex_unlock(&mapping->i_mmap_mutex); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f1bb534254f6..c9b40e3a9936 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2474,7 +2474,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, struct hstate *h = hstate_vma(vma); struct vm_area_struct *iter_vma; struct address_space *mapping; - struct prio_tree_iter iter; pgoff_t pgoff; /* @@ -2491,7 +2490,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * __unmap_hugepage_range() is called as the lock is already held */ mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) { /* Do not unmap the current VMA */ if (iter_vma == vma) continue; diff --git a/mm/interval_tree.c b/mm/interval_tree.c new file mode 100644 index 000000000000..7dc565660e56 --- /dev/null +++ b/mm/interval_tree.c @@ -0,0 +1,61 @@ +/* + * mm/interval_tree.c - interval tree for mapping->i_mmap + * + * Copyright (C) 2012, Michel Lespinasse + * + * This file is released under the GPL v2. + */ + +#include +#include + +#define ITSTRUCT struct vm_area_struct +#define ITRB shared.linear.rb +#define ITTYPE unsigned long +#define ITSUBTREE shared.linear.rb_subtree_last +#define ITSTART(n) ((n)->vm_pgoff) +#define ITLAST(n) ((n)->vm_pgoff + \ + (((n)->vm_end - (n)->vm_start) >> PAGE_SHIFT) - 1) +#define ITSTATIC +#define ITPREFIX vma_interval_tree + +#include + +/* Insert old immediately after vma in the interval tree */ +void vma_interval_tree_add(struct vm_area_struct *vma, + struct vm_area_struct *old, + struct address_space *mapping) +{ + struct rb_node **link; + struct vm_area_struct *parent; + unsigned long last; + + if (unlikely(vma->vm_flags & VM_NONLINEAR)) { + list_add(&vma->shared.nonlinear, &old->shared.nonlinear); + return; + } + + last = ITLAST(vma); + + if (!old->shared.linear.rb.rb_right) { + parent = old; + link = &old->shared.linear.rb.rb_right; + } else { + parent = rb_entry(old->shared.linear.rb.rb_right, + struct vm_area_struct, shared.linear.rb); + if (parent->shared.linear.rb_subtree_last < last) + parent->shared.linear.rb_subtree_last = last; + while (parent->shared.linear.rb.rb_left) { + parent = rb_entry(parent->shared.linear.rb.rb_left, + struct vm_area_struct, shared.linear.rb); + if (parent->shared.linear.rb_subtree_last < last) + parent->shared.linear.rb_subtree_last = last; + } + link = &parent->shared.linear.rb.rb_left; + } + + vma->shared.linear.rb_subtree_last = last; + rb_link_node(&vma->shared.linear.rb, &parent->shared.linear.rb, link); + rb_insert_augmented(&vma->shared.linear.rb, &mapping->i_mmap, + &vma_interval_tree_augment_callbacks); +} diff --git a/mm/memory-failure.c b/mm/memory-failure.c index a6e2141a6610..c38a6257d082 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -431,7 +431,6 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, { struct vm_area_struct *vma; struct task_struct *tsk; - struct prio_tree_iter iter; struct address_space *mapping = page->mapping; mutex_lock(&mapping->i_mmap_mutex); @@ -442,7 +441,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, if (!task_early_kill(tsk)) continue; - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { /* * Send early kill signal to tasks where a vma covers diff --git a/mm/memory.c b/mm/memory.c index e09c04813186..d205e4381a34 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2801,14 +2801,13 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma, zap_page_range_single(vma, start_addr, end_addr - start_addr, details); } -static inline void unmap_mapping_range_tree(struct prio_tree_root *root, +static inline void unmap_mapping_range_tree(struct rb_root *root, struct zap_details *details) { struct vm_area_struct *vma; - struct prio_tree_iter iter; pgoff_t vba, vea, zba, zea; - vma_prio_tree_foreach(vma, &iter, root, + vma_interval_tree_foreach(vma, root, details->first_index, details->last_index) { vba = vma->vm_pgoff; @@ -2839,7 +2838,7 @@ static inline void unmap_mapping_range_list(struct list_head *head, * across *all* the pages in each nonlinear VMA, not just the pages * whose virtual address lies outside the file truncation point. */ - list_for_each_entry(vma, head, shared.vm_set.list) { + list_for_each_entry(vma, head, shared.nonlinear) { details->nonlinear_vma = vma; unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details); } @@ -2883,7 +2882,7 @@ void unmap_mapping_range(struct address_space *mapping, mutex_lock(&mapping->i_mmap_mutex); - if (unlikely(!prio_tree_empty(&mapping->i_mmap))) + if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) unmap_mapping_range_tree(&mapping->i_mmap, &details); if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); diff --git a/mm/mmap.c b/mm/mmap.c index e3c365ff1b6a..5ac533f88e99 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -199,14 +199,14 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, flush_dcache_mmap_lock(mapping); if (unlikely(vma->vm_flags & VM_NONLINEAR)) - list_del_init(&vma->shared.vm_set.list); + list_del_init(&vma->shared.nonlinear); else - vma_prio_tree_remove(vma, &mapping->i_mmap); + vma_interval_tree_remove(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); } /* - * Unlink a file-based vm structure from its prio_tree, to hide + * Unlink a file-based vm structure from its interval tree, to hide * vma from rmap and vmtruncate before freeing its page tables. */ void unlink_file_vma(struct vm_area_struct *vma) @@ -411,7 +411,7 @@ static void __vma_link_file(struct vm_area_struct *vma) if (unlikely(vma->vm_flags & VM_NONLINEAR)) vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); else - vma_prio_tree_insert(vma, &mapping->i_mmap); + vma_interval_tree_insert(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); } } @@ -449,7 +449,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, /* * Helper for vma_adjust() in the split_vma insert case: insert a vma into the - * mm's list and rbtree. It has already been inserted into the prio_tree. + * mm's list and rbtree. It has already been inserted into the interval tree. */ static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { @@ -491,7 +491,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, struct vm_area_struct *next = vma->vm_next; struct vm_area_struct *importer = NULL; struct address_space *mapping = NULL; - struct prio_tree_root *root = NULL; + struct rb_root *root = NULL; struct anon_vma *anon_vma = NULL; struct file *file = vma->vm_file; long adjust_next = 0; @@ -554,7 +554,7 @@ again: remove_next = 1 + (end > next->vm_end); mutex_lock(&mapping->i_mmap_mutex); if (insert) { /* - * Put into prio_tree now, so instantiated pages + * Put into interval tree now, so instantiated pages * are visible to arm/parisc __flush_dcache_page * throughout; but we cannot insert into address * space until vma start or end is updated. @@ -582,9 +582,9 @@ again: remove_next = 1 + (end > next->vm_end); if (root) { flush_dcache_mmap_lock(mapping); - vma_prio_tree_remove(vma, root); + vma_interval_tree_remove(vma, root); if (adjust_next) - vma_prio_tree_remove(next, root); + vma_interval_tree_remove(next, root); } vma->vm_start = start; @@ -597,8 +597,8 @@ again: remove_next = 1 + (end > next->vm_end); if (root) { if (adjust_next) - vma_prio_tree_insert(next, root); - vma_prio_tree_insert(vma, root); + vma_interval_tree_insert(next, root); + vma_interval_tree_insert(vma, root); flush_dcache_mmap_unlock(mapping); } diff --git a/mm/nommu.c b/mm/nommu.c index 12e84e69dd06..45131b41bcdb 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -698,7 +698,7 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); - vma_prio_tree_insert(vma, &mapping->i_mmap); + vma_interval_tree_insert(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); mutex_unlock(&mapping->i_mmap_mutex); } @@ -764,7 +764,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) mutex_lock(&mapping->i_mmap_mutex); flush_dcache_mmap_lock(mapping); - vma_prio_tree_remove(vma, &mapping->i_mmap); + vma_interval_tree_remove(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); mutex_unlock(&mapping->i_mmap_mutex); } @@ -2044,7 +2044,6 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, size_t newsize) { struct vm_area_struct *vma; - struct prio_tree_iter iter; struct vm_region *region; pgoff_t low, high; size_t r_size, r_top; @@ -2056,8 +2055,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, mutex_lock(&inode->i_mapping->i_mmap_mutex); /* search for VMAs that fall within the dead zone */ - vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, - low, high) { + vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) { /* found one - only interested if it's shared out of the page * cache */ if (vma->vm_flags & VM_SHARED) { @@ -2073,8 +2071,8 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, * we don't check for any regions that start beyond the EOF as there * shouldn't be any */ - vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, - 0, ULONG_MAX) { + vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, + 0, ULONG_MAX) { if (!(vma->vm_flags & VM_SHARED)) continue; diff --git a/mm/prio_tree.c b/mm/prio_tree.c deleted file mode 100644 index 799dcfd7cd8c..000000000000 --- a/mm/prio_tree.c +++ /dev/null @@ -1,208 +0,0 @@ -/* - * mm/prio_tree.c - priority search tree for mapping->i_mmap - * - * Copyright (C) 2004, Rajesh Venkatasubramanian - * - * This file is released under the GPL v2. - * - * Based on the radix priority search tree proposed by Edward M. McCreight - * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985 - * - * 02Feb2004 Initial version - */ - -#include -#include -#include - -/* - * See lib/prio_tree.c for details on the general radix priority search tree - * code. - */ - -/* - * The following #defines are mirrored from lib/prio_tree.c. They're only used - * for debugging, and should be removed (along with the debugging code using - * them) when switching also VMAs to the regular prio_tree code. - */ - -#define RADIX_INDEX(vma) ((vma)->vm_pgoff) -#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT) -/* avoid overflow */ -#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1)) - -/* - * Radix priority search tree for address_space->i_mmap - * - * For each vma that map a unique set of file pages i.e., unique [radix_index, - * heap_index] value, we have a corresponding priority search tree node. If - * multiple vmas have identical [radix_index, heap_index] value, then one of - * them is used as a tree node and others are stored in a vm_set list. The tree - * node points to the first vma (head) of the list using vm_set.head. - * - * prio_tree_root - * | - * A vm_set.head - * / \ / - * L R -> H-I-J-K-M-N-O-P-Q-S - * ^ ^ <-- vm_set.list --> - * tree nodes - * - * We need some way to identify whether a vma is a tree node, head of a vm_set - * list, or just a member of a vm_set list. We cannot use vm_flags to store - * such information. The reason is, in the above figure, it is possible that - * vm_flags' of R and H are covered by the different mmap_sems. When R is - * removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold - * H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now. - * That's why some trick involving shared.vm_set.parent is used for identifying - * tree nodes and list head nodes. - * - * vma radix priority search tree node rules: - * - * vma->shared.vm_set.parent != NULL ==> a tree node - * vma->shared.vm_set.head != NULL ==> list of others mapping same range - * vma->shared.vm_set.head == NULL ==> no others map the same range - * - * vma->shared.vm_set.parent == NULL - * vma->shared.vm_set.head != NULL ==> list head of vmas mapping same range - * vma->shared.vm_set.head == NULL ==> a list node - */ - -/* - * Add a new vma known to map the same set of pages as the old vma: - * useful for fork's dup_mmap as well as vma_prio_tree_insert below. - * Note that it just happens to work correctly on i_mmap_nonlinear too. - */ -void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old) -{ - /* Leave these BUG_ONs till prio_tree patch stabilizes */ - BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old)); - BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old)); - - vma->shared.vm_set.head = NULL; - vma->shared.vm_set.parent = NULL; - - if (!old->shared.vm_set.parent) - list_add(&vma->shared.vm_set.list, - &old->shared.vm_set.list); - else if (old->shared.vm_set.head) - list_add_tail(&vma->shared.vm_set.list, - &old->shared.vm_set.head->shared.vm_set.list); - else { - INIT_LIST_HEAD(&vma->shared.vm_set.list); - vma->shared.vm_set.head = old; - old->shared.vm_set.head = vma; - } -} - -void vma_prio_tree_insert(struct vm_area_struct *vma, - struct prio_tree_root *root) -{ - struct prio_tree_node *ptr; - struct vm_area_struct *old; - - vma->shared.vm_set.head = NULL; - - ptr = raw_prio_tree_insert(root, &vma->shared.prio_tree_node); - if (ptr != (struct prio_tree_node *) &vma->shared.prio_tree_node) { - old = prio_tree_entry(ptr, struct vm_area_struct, - shared.prio_tree_node); - vma_prio_tree_add(vma, old); - } -} - -void vma_prio_tree_remove(struct vm_area_struct *vma, - struct prio_tree_root *root) -{ - struct vm_area_struct *node, *head, *new_head; - - if (!vma->shared.vm_set.head) { - if (!vma->shared.vm_set.parent) - list_del_init(&vma->shared.vm_set.list); - else - raw_prio_tree_remove(root, &vma->shared.prio_tree_node); - } else { - /* Leave this BUG_ON till prio_tree patch stabilizes */ - BUG_ON(vma->shared.vm_set.head->shared.vm_set.head != vma); - if (vma->shared.vm_set.parent) { - head = vma->shared.vm_set.head; - if (!list_empty(&head->shared.vm_set.list)) { - new_head = list_entry( - head->shared.vm_set.list.next, - struct vm_area_struct, - shared.vm_set.list); - list_del_init(&head->shared.vm_set.list); - } else - new_head = NULL; - - raw_prio_tree_replace(root, &vma->shared.prio_tree_node, - &head->shared.prio_tree_node); - head->shared.vm_set.head = new_head; - if (new_head) - new_head->shared.vm_set.head = head; - - } else { - node = vma->shared.vm_set.head; - if (!list_empty(&vma->shared.vm_set.list)) { - new_head = list_entry( - vma->shared.vm_set.list.next, - struct vm_area_struct, - shared.vm_set.list); - list_del_init(&vma->shared.vm_set.list); - node->shared.vm_set.head = new_head; - new_head->shared.vm_set.head = node; - } else - node->shared.vm_set.head = NULL; - } - } -} - -/* - * Helper function to enumerate vmas that map a given file page or a set of - * contiguous file pages. The function returns vmas that at least map a single - * page in the given range of contiguous file pages. - */ -struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma, - struct prio_tree_iter *iter) -{ - struct prio_tree_node *ptr; - struct vm_area_struct *next; - - if (!vma) { - /* - * First call is with NULL vma - */ - ptr = prio_tree_next(iter); - if (ptr) { - next = prio_tree_entry(ptr, struct vm_area_struct, - shared.prio_tree_node); - prefetch(next->shared.vm_set.head); - return next; - } else - return NULL; - } - - if (vma->shared.vm_set.parent) { - if (vma->shared.vm_set.head) { - next = vma->shared.vm_set.head; - prefetch(next->shared.vm_set.list.next); - return next; - } - } else { - next = list_entry(vma->shared.vm_set.list.next, - struct vm_area_struct, shared.vm_set.list); - if (!next->shared.vm_set.head) { - prefetch(next->shared.vm_set.list.next); - return next; - } - } - - ptr = prio_tree_next(iter); - if (ptr) { - next = prio_tree_entry(ptr, struct vm_area_struct, - shared.prio_tree_node); - prefetch(next->shared.vm_set.head); - return next; - } else - return NULL; -} diff --git a/mm/rmap.c b/mm/rmap.c index 0f3b7cda2a24..7b5b51d25fc5 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -820,7 +820,6 @@ static int page_referenced_file(struct page *page, struct address_space *mapping = page->mapping; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct vm_area_struct *vma; - struct prio_tree_iter iter; int referenced = 0; /* @@ -846,7 +845,7 @@ static int page_referenced_file(struct page *page, */ mapcount = page_mapcount(page); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) continue; @@ -945,13 +944,12 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page) { pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct vm_area_struct *vma; - struct prio_tree_iter iter; int ret = 0; BUG_ON(PageAnon(page)); mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (vma->vm_flags & VM_SHARED) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) @@ -1547,7 +1545,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) struct address_space *mapping = page->mapping; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct vm_area_struct *vma; - struct prio_tree_iter iter; int ret = SWAP_AGAIN; unsigned long cursor; unsigned long max_nl_cursor = 0; @@ -1555,7 +1552,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) unsigned int mapcount; mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) continue; @@ -1576,7 +1573,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) goto out; list_for_each_entry(vma, &mapping->i_mmap_nonlinear, - shared.vm_set.list) { + shared.nonlinear) { cursor = (unsigned long) vma->vm_private_data; if (cursor > max_nl_cursor) max_nl_cursor = cursor; @@ -1608,7 +1605,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) do { list_for_each_entry(vma, &mapping->i_mmap_nonlinear, - shared.vm_set.list) { + shared.nonlinear) { cursor = (unsigned long) vma->vm_private_data; while ( cursor < max_nl_cursor && cursor < vma->vm_end - vma->vm_start) { @@ -1631,7 +1628,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) * in locked vmas). Reset cursor on all unreserved nonlinear * vmas, now forgetting on which ones it had fallen behind. */ - list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) + list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear) vma->vm_private_data = NULL; out: mutex_unlock(&mapping->i_mmap_mutex); @@ -1748,13 +1745,12 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *, struct address_space *mapping = page->mapping; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct vm_area_struct *vma; - struct prio_tree_iter iter; int ret = SWAP_AGAIN; if (!mapping) return ret; mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) continue; -- cgit v1.2.3