summaryrefslogtreecommitdiff
path: root/mm/shmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c223
1 files changed, 192 insertions, 31 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 9d32e1cb9f38..e67d6ba4e98e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -29,11 +29,14 @@
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/mm.h>
+#include <linux/sched/signal.h>
#include <linux/export.h>
#include <linux/swap.h>
#include <linux/uio.h>
#include <linux/khugepaged.h>
+#include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
+
static struct vfsmount *shm_mnt;
#ifdef CONFIG_SHMEM
@@ -70,8 +73,10 @@ static struct vfsmount *shm_mnt;
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <uapi/linux/memfd.h>
+#include <linux/userfaultfd_k.h>
+#include <linux/rmap.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/pgtable.h>
#include "internal.h"
@@ -115,13 +120,14 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index);
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp,
- gfp_t gfp, struct mm_struct *fault_mm, int *fault_type);
+ gfp_t gfp, struct vm_area_struct *vma,
+ struct vm_fault *vmf, int *fault_type);
int shmem_getpage(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp)
{
return shmem_getpage_gfp(inode, index, pagep, sgp,
- mapping_gfp_mask(inode->i_mapping), NULL, NULL);
+ mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL);
}
static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
@@ -190,6 +196,11 @@ static const struct inode_operations shmem_special_inode_operations;
static const struct vm_operations_struct shmem_vm_ops;
static struct file_system_type shmem_fs_type;
+bool vma_is_shmem(struct vm_area_struct *vma)
+{
+ return vma->vm_ops == &shmem_vm_ops;
+}
+
static LIST_HEAD(shmem_swaplist);
static DEFINE_MUTEX(shmem_swaplist_mutex);
@@ -300,18 +311,19 @@ void shmem_uncharge(struct inode *inode, long pages)
static int shmem_radix_tree_replace(struct address_space *mapping,
pgoff_t index, void *expected, void *replacement)
{
+ struct radix_tree_node *node;
void **pslot;
void *item;
VM_BUG_ON(!expected);
VM_BUG_ON(!replacement);
- pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
- if (!pslot)
+ item = __radix_tree_lookup(&mapping->page_tree, index, &node, &pslot);
+ if (!item)
return -ENOENT;
- item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
if (item != expected)
return -ENOENT;
- radix_tree_replace_slot(pslot, replacement);
+ __radix_tree_replace(&mapping->page_tree, node, pslot,
+ replacement, NULL, NULL);
return 0;
}
@@ -370,6 +382,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
int shmem_huge __read_mostly;
+#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
static int shmem_parse_huge(const char *str)
{
if (!strcmp(str, "never"))
@@ -407,11 +420,13 @@ static const char *shmem_format_huge(int huge)
return "bad_val";
}
}
+#endif
static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
struct shrink_control *sc, unsigned long nr_to_split)
{
LIST_HEAD(list), *pos, *next;
+ LIST_HEAD(to_remove);
struct inode *inode;
struct shmem_inode_info *info;
struct page *page;
@@ -438,9 +453,8 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
/* Check if there's anything to gain */
if (round_up(inode->i_size, PAGE_SIZE) ==
round_up(inode->i_size, HPAGE_PMD_SIZE)) {
- list_del_init(&info->shrinklist);
+ list_move(&info->shrinklist, &to_remove);
removed++;
- iput(inode);
goto next;
}
@@ -451,6 +465,13 @@ next:
}
spin_unlock(&sbinfo->shrinklist_lock);
+ list_for_each_safe(pos, next, &to_remove) {
+ info = list_entry(pos, struct shmem_inode_info, shrinklist);
+ inode = &info->vfs_inode;
+ list_del_init(&info->shrinklist);
+ iput(inode);
+ }
+
list_for_each_safe(pos, next, &list) {
int ret;
@@ -658,8 +679,8 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
swapped++;
if (need_resched()) {
+ slot = radix_tree_iter_resume(slot, &iter);
cond_resched_rcu();
- slot = radix_tree_iter_next(&iter);
}
}
@@ -938,10 +959,10 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);
-static int shmem_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int shmem_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = path->dentry->d_inode;
struct shmem_inode_info *info = SHMEM_I(inode);
if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
@@ -1046,6 +1067,30 @@ static void shmem_evict_inode(struct inode *inode)
clear_inode(inode);
}
+static unsigned long find_swap_entry(struct radix_tree_root *root, void *item)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+ unsigned long found = -1;
+ unsigned int checked = 0;
+
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, root, &iter, 0) {
+ if (*slot == item) {
+ found = iter.index;
+ break;
+ }
+ checked++;
+ if ((checked % 4096) != 0)
+ continue;
+ slot = radix_tree_iter_resume(slot, &iter);
+ cond_resched_rcu();
+ }
+
+ rcu_read_unlock();
+ return found;
+}
+
/*
* If swap found in inode, free it and move page from swapcache to filecache.
*/
@@ -1059,7 +1104,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
int error = 0;
radswap = swp_to_radix_entry(swap);
- index = radix_tree_locate_item(&mapping->page_tree, radswap);
+ index = find_swap_entry(&mapping->page_tree, radswap);
if (index == -1)
return -EAGAIN; /* tell shmem_unuse we found nothing */
@@ -1536,10 +1581,10 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
*/
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp, gfp_t gfp,
- struct mm_struct *fault_mm, int *fault_type)
+ struct vm_area_struct *vma, struct vm_fault *vmf, int *fault_type)
{
struct address_space *mapping = inode->i_mapping;
- struct shmem_inode_info *info;
+ struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo;
struct mm_struct *charge_mm;
struct mem_cgroup *memcg;
@@ -1589,9 +1634,8 @@ repeat:
* Fast cache lookup did not find it:
* bring it back from swap or allocate.
*/
- info = SHMEM_I(inode);
sbinfo = SHMEM_SB(inode->i_sb);
- charge_mm = fault_mm ? : current->mm;
+ charge_mm = vma ? vma->vm_mm : current->mm;
if (swap.val) {
/* Look it up and read it in.. */
@@ -1601,7 +1645,8 @@ repeat:
if (fault_type) {
*fault_type |= VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(fault_mm, PGMAJFAULT);
+ mem_cgroup_count_vm_event(charge_mm,
+ PGMAJFAULT);
}
/* Here we actually start the io */
page = shmem_swapin(swap, gfp, info, index);
@@ -1670,6 +1715,11 @@ repeat:
swap_free(swap);
} else {
+ if (vma && userfaultfd_missing(vma)) {
+ *fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
+ return 0;
+ }
+
/* shmem_symlink() */
if (mapping->a_ops != &shmem_aops)
goto alloc_nohuge;
@@ -1837,7 +1887,6 @@ unlock:
put_page(page);
}
if (error == -ENOSPC && !once++) {
- info = SHMEM_I(inode);
spin_lock_irq(&info->lock);
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
@@ -1860,8 +1909,9 @@ static int synchronous_wake_function(wait_queue_t *wait, unsigned mode, int sync
return ret;
}
-static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int shmem_fault(struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
enum sgp_type sgp;
@@ -1933,7 +1983,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
sgp = SGP_NOHUGE;
error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
- gfp, vma->vm_mm, &ret);
+ gfp, vma, vmf, &ret);
if (error)
return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
return ret;
@@ -2143,10 +2193,123 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
bool shmem_mapping(struct address_space *mapping)
{
- if (!mapping->host)
- return false;
+ return mapping->a_ops == &shmem_aops;
+}
- return mapping->host->i_sb->s_op == &shmem_ops;
+int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
+ pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ struct page **pagep)
+{
+ struct inode *inode = file_inode(dst_vma->vm_file);
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ struct address_space *mapping = inode->i_mapping;
+ gfp_t gfp = mapping_gfp_mask(mapping);
+ pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
+ struct mem_cgroup *memcg;
+ spinlock_t *ptl;
+ void *page_kaddr;
+ struct page *page;
+ pte_t _dst_pte, *dst_pte;
+ int ret;
+
+ ret = -ENOMEM;
+ if (shmem_acct_block(info->flags, 1))
+ goto out;
+ if (sbinfo->max_blocks) {
+ if (percpu_counter_compare(&sbinfo->used_blocks,
+ sbinfo->max_blocks) >= 0)
+ goto out_unacct_blocks;
+ percpu_counter_inc(&sbinfo->used_blocks);
+ }
+
+ if (!*pagep) {
+ page = shmem_alloc_page(gfp, info, pgoff);
+ if (!page)
+ goto out_dec_used_blocks;
+
+ page_kaddr = kmap_atomic(page);
+ ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
+ PAGE_SIZE);
+ kunmap_atomic(page_kaddr);
+
+ /* fallback to copy_from_user outside mmap_sem */
+ if (unlikely(ret)) {
+ *pagep = page;
+ if (sbinfo->max_blocks)
+ percpu_counter_add(&sbinfo->used_blocks, -1);
+ shmem_unacct_blocks(info->flags, 1);
+ /* don't free the page */
+ return -EFAULT;
+ }
+ } else {
+ page = *pagep;
+ *pagep = NULL;
+ }
+
+ VM_BUG_ON(PageLocked(page) || PageSwapBacked(page));
+ __SetPageLocked(page);
+ __SetPageSwapBacked(page);
+ __SetPageUptodate(page);
+
+ ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
+ if (ret)
+ goto out_release;
+
+ ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
+ if (!ret) {
+ ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL);
+ radix_tree_preload_end();
+ }
+ if (ret)
+ goto out_release_uncharge;
+
+ mem_cgroup_commit_charge(page, memcg, false, false);
+
+ _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
+ if (dst_vma->vm_flags & VM_WRITE)
+ _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
+
+ ret = -EEXIST;
+ dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+ if (!pte_none(*dst_pte))
+ goto out_release_uncharge_unlock;
+
+ lru_cache_add_anon(page);
+
+ spin_lock(&info->lock);
+ info->alloced++;
+ inode->i_blocks += BLOCKS_PER_PAGE;
+ shmem_recalc_inode(inode);
+ spin_unlock(&info->lock);
+
+ inc_mm_counter(dst_mm, mm_counter_file(page));
+ page_add_file_rmap(page, false);
+ set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(dst_vma, dst_addr, dst_pte);
+ unlock_page(page);
+ pte_unmap_unlock(dst_pte, ptl);
+ ret = 0;
+out:
+ return ret;
+out_release_uncharge_unlock:
+ pte_unmap_unlock(dst_pte, ptl);
+out_release_uncharge:
+ mem_cgroup_cancel_charge(page, memcg, false);
+out_release:
+ unlock_page(page);
+ put_page(page);
+out_dec_used_blocks:
+ if (sbinfo->max_blocks)
+ percpu_counter_add(&sbinfo->used_blocks, -1);
+out_unacct_blocks:
+ shmem_unacct_blocks(info->flags, 1);
+ goto out;
}
#ifdef CONFIG_TMPFS
@@ -2169,7 +2332,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index = pos >> PAGE_SHIFT;
/* i_mutex is held by caller */
- if (unlikely(info->seals)) {
+ if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) {
if (info->seals & F_SEAL_WRITE)
return -EPERM;
if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
@@ -2446,8 +2609,8 @@ static void shmem_tag_pins(struct address_space *mapping)
}
if (need_resched()) {
+ slot = radix_tree_iter_resume(slot, &iter);
cond_resched_rcu();
- slot = radix_tree_iter_next(&iter);
}
}
rcu_read_unlock();
@@ -2516,8 +2679,8 @@ static int shmem_wait_for_pins(struct address_space *mapping)
spin_unlock_irq(&mapping->tree_lock);
continue_resched:
if (need_resched()) {
+ slot = radix_tree_iter_resume(slot, &iter);
cond_resched_rcu();
- slot = radix_tree_iter_next(&iter);
}
}
rcu_read_unlock();
@@ -3187,7 +3350,6 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
#endif /* CONFIG_TMPFS_XATTR */
static const struct inode_operations shmem_short_symlink_operations = {
- .readlink = generic_readlink,
.get_link = simple_get_link,
#ifdef CONFIG_TMPFS_XATTR
.listxattr = shmem_listxattr,
@@ -3195,7 +3357,6 @@ static const struct inode_operations shmem_short_symlink_operations = {
};
static const struct inode_operations shmem_symlink_inode_operations = {
- .readlink = generic_readlink,
.get_link = shmem_get_link,
#ifdef CONFIG_TMPFS_XATTR
.listxattr = shmem_listxattr,
@@ -4110,7 +4271,7 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
BUG_ON(mapping->a_ops != &shmem_aops);
error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
- gfp, NULL, NULL);
+ gfp, NULL, NULL, NULL);
if (error)
page = ERR_PTR(error);
else