diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2014-06-24 14:38:16 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2014-06-24 14:38:16 +1000 |
commit | d66e6e048c29479fca60b07a2516579fde21e127 (patch) | |
tree | 2d1574d6dd8cbe4844c7237f1a1ec4c7eb51a33e | |
parent | 1f7e6d9a714331d14418c6d370c40aa11286f3a0 (diff) | |
parent | 26acace7dd536a0a02b57a05aa5bd509517974a5 (diff) |
Merge branch 'akpm/master'
-rw-r--r-- | Documentation/ABI/testing/sysfs-class-bdi | 8 | ||||
-rw-r--r-- | Documentation/cgroups/memory.txt | 3 | ||||
-rw-r--r-- | Documentation/filesystems/vfat.txt | 10 | ||||
-rw-r--r-- | Documentation/vm/remap_file_pages.txt | 7 | ||||
-rw-r--r-- | block/bio.c | 48 | ||||
-rw-r--r-- | drivers/gpio/gpio-zevio.c | 4 | ||||
-rw-r--r-- | drivers/w1/w1_int.c | 5 | ||||
-rw-r--r-- | fs/fat/cache.c | 70 | ||||
-rw-r--r-- | fs/fat/fat.h | 6 | ||||
-rw-r--r-- | fs/fat/file.c | 78 | ||||
-rw-r--r-- | fs/fat/inode.c | 87 | ||||
-rw-r--r-- | include/linux/fs.h | 8 | ||||
-rw-r--r-- | init/main.c | 23 | ||||
-rw-r--r-- | kernel/kprobes.c | 36 | ||||
-rw-r--r-- | mm/Makefile | 2 | ||||
-rw-r--r-- | mm/backing-dev.c | 35 | ||||
-rw-r--r-- | mm/fremap.c | 283 | ||||
-rw-r--r-- | mm/memcontrol.c | 5 | ||||
-rw-r--r-- | mm/mmap.c | 69 | ||||
-rw-r--r-- | mm/nommu.c | 8 |
20 files changed, 422 insertions, 373 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index d773d5697cf5..3187a18af6da 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -53,3 +53,11 @@ stable_pages_required (read-only) If set, the backing device requires that all pages comprising a write request must not be changed until writeout is complete. + +strictlimit (read-write) + + Forces per-BDI checks for the share of given device in the write-back + cache even before the global background dirty limit is reached. This + is useful in situations where the global limit is much higher than + affordable for given relatively slow (or untrusted) device. Turning + strictlimit on has no visible effect if max_ratio is equal to 100%. diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 02ab997a1ed2..3d9e4d353dad 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -473,6 +473,9 @@ About use_hierarchy, see Section 6. write will still return success. In this case, it is expected that memory.kmem.usage_in_bytes == memory.usage_in_bytes. + Please note that this knob is considered deprecated and will be removed + in the future. + About use_hierarchy, see Section 6. 5.2 stat file diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index ce1126aceed8..223c32171dcc 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -180,6 +180,16 @@ dos1xfloppy -- If set, use a fallback default BIOS Parameter Block <bool>: 0,1,yes,no,true,false +LIMITATION +--------------------------------------------------------------------- +* The fallocated region of file is discarded at umount/evict time + when using fallocate with FALLOC_FL_KEEP_SIZE. + So, User should assume that fallocated region can be discarded at + last close if there is memory pressure resulting in eviction of + the inode from the memory. As a result, for any dependency on + the fallocated region, user should make sure to recheck fallocate + after reopening the file. + TODO ---------------------------------------------------------------------- * Need to get rid of the raw scanning stuff. Instead, always use diff --git a/Documentation/vm/remap_file_pages.txt b/Documentation/vm/remap_file_pages.txt index 560e4363a55d..f609142f406a 100644 --- a/Documentation/vm/remap_file_pages.txt +++ b/Documentation/vm/remap_file_pages.txt @@ -18,10 +18,9 @@ on 32-bit systems to map files bigger than can linearly fit into 32-bit virtual address space. This use-case is not critical anymore since 64-bit systems are widely available. -The plan is to deprecate the syscall and replace it with an emulation. -The emulation will create new VMAs instead of nonlinear mappings. It's -going to work slower for rare users of remap_file_pages() but ABI is -preserved. +The syscall is deprecated and replaced it with an emulation now. The +emulation creates new VMAs instead of nonlinear mappings. It's going to +work slower for rare users of remap_file_pages() but ABI is preserved. One side effect of emulation (apart from performance) is that user can hit vm.max_map_count limit more easily due to additional VMAs. See comment for diff --git a/block/bio.c b/block/bio.c index 8c2e55e39a1b..6e8224d3d51c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -752,29 +752,31 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page return 0; /* - * we might lose a segment or two here, but rather that than - * make this too complex. + * setup the new entry, we might clear it again later if we + * cannot add the page + */ + bvec = &bio->bi_io_vec[bio->bi_vcnt]; + bvec->bv_page = page; + bvec->bv_len = len; + bvec->bv_offset = offset; + bio->bi_vcnt++; + bio->bi_phys_segments++; + + /* + * Perform a recount if the number of segments is greater + * than queue_max_segments(q). */ - while (bio->bi_phys_segments >= queue_max_segments(q)) { + while (bio->bi_phys_segments > queue_max_segments(q)) { if (retried_segments) - return 0; + goto failed; retried_segments = 1; blk_recount_segments(q, bio); } /* - * setup the new entry, we might clear it again later if we - * cannot add the page - */ - bvec = &bio->bi_io_vec[bio->bi_vcnt]; - bvec->bv_page = page; - bvec->bv_len = len; - bvec->bv_offset = offset; - - /* * if queue has other restrictions (eg varying max sector size * depending on offset), it can specify a merge_bvec_fn in the * queue to get further control @@ -791,23 +793,25 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page * merge_bvec_fn() returns number of bytes it can accept * at this offset */ - if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) { - bvec->bv_page = NULL; - bvec->bv_len = 0; - bvec->bv_offset = 0; - return 0; - } + if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) + goto failed; } /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) + if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) bio->bi_flags &= ~(1 << BIO_SEG_VALID); - bio->bi_vcnt++; - bio->bi_phys_segments++; done: bio->bi_iter.bi_size += len; return len; + + failed: + bvec->bv_page = NULL; + bvec->bv_len = 0; + bvec->bv_offset = 0; + bio->bi_vcnt--; + blk_recount_segments(q, bio); + return 0; } /** diff --git a/drivers/gpio/gpio-zevio.c b/drivers/gpio/gpio-zevio.c index 54e54e4cc6c4..cd71e5769a76 100644 --- a/drivers/gpio/gpio-zevio.c +++ b/drivers/gpio/gpio-zevio.c @@ -18,6 +18,10 @@ #include <linux/slab.h> #include <linux/gpio.h> +#ifndef IOMEM +#define IOMEM(x) ((void __force __iomem *)(x)) +#endif + /* * Memory layout: * This chip has four gpio sections, each controls 8 GPIOs. diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 47249a30eae3..daaa4f27c503 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -91,9 +91,8 @@ static struct w1_master *w1_alloc_dev(u32 id, int slave_count, int slave_ttl, err = device_register(&dev->dev); if (err) { pr_err("Failed to register master device. err=%d\n", err); - memset(dev, 0, sizeof(struct w1_master)); - kfree(dev); - dev = NULL; + put_device(&dev->dev); + return NULL; } return dev; diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 91ad9e1c9441..e26bc9a22ac9 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -303,6 +303,31 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) return dclus; } +static int fat_get_mapped_cluster(struct inode *inode, sector_t sector, + sector_t last_block, + unsigned long *mapped_blocks, sector_t *bmap) +{ + struct super_block *sb = inode->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); + int cluster, offset; + + cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits); + offset = sector & (sbi->sec_per_clus - 1); + cluster = fat_bmap_cluster(inode, cluster); + + if (cluster < 0) + return cluster; + + else if (cluster) { + *bmap = fat_clus_to_blknr(sbi, cluster) + offset; + *mapped_blocks = sbi->sec_per_clus - offset; + if (*mapped_blocks > last_block - sector) + *mapped_blocks = last_block - sector; + } + + return 0; +} + int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, unsigned long *mapped_blocks, int create) { @@ -311,7 +336,6 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, const unsigned long blocksize = sb->s_blocksize; const unsigned char blocksize_bits = sb->s_blocksize_bits; sector_t last_block; - int cluster, offset; *phys = 0; *mapped_blocks = 0; @@ -329,25 +353,39 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, return 0; /* - * ->mmu_private can access on only allocation path. - * (caller must hold ->i_mutex) + * Both ->mmu_private and ->i_disksize can access + * on only allocation path. (caller must hold ->i_mutex) */ - last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1)) + last_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1)) >> blocksize_bits; if (sector >= last_block) return 0; } - cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits); - offset = sector & (sbi->sec_per_clus - 1); - cluster = fat_bmap_cluster(inode, cluster); - if (cluster < 0) - return cluster; - else if (cluster) { - *phys = fat_clus_to_blknr(sbi, cluster) + offset; - *mapped_blocks = sbi->sec_per_clus - offset; - if (*mapped_blocks > last_block - sector) - *mapped_blocks = last_block - sector; - } - return 0; + return fat_get_mapped_cluster(inode, sector, last_block, mapped_blocks, + phys); +} + +int fat_bmap2(struct inode *inode, sector_t sector, + unsigned long *mapped_blocks, struct buffer_head *bh_result, + int create, sector_t *bmap) +{ + struct super_block *sb = inode->i_sb; + sector_t last_block; + const unsigned long blocksize = sb->s_blocksize; + const unsigned char blocksize_bits = sb->s_blocksize_bits; + + BUG_ON(create != 0); + + *bmap = 0; + *mapped_blocks = 0; + + last_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1)) + >> blocksize_bits; + + if (sector >= last_block) + return 0; + + return fat_get_mapped_cluster(inode, sector, last_block, mapped_blocks, + bmap); } diff --git a/fs/fat/fat.h b/fs/fat/fat.h index e0c4ba39a377..13b7202bd651 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -119,7 +119,8 @@ struct msdos_inode_info { unsigned int cache_valid_id; /* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */ - loff_t mmu_private; /* physically allocated size */ + loff_t mmu_private; /* physically allocated size (initialized) */ + loff_t i_disksize; /* physically allocated size (uninitialized) */ int i_start; /* first cluster or 0 */ int i_logstart; /* logical first cluster */ @@ -290,6 +291,9 @@ extern int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus); extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, unsigned long *mapped_blocks, int create); +extern int fat_bmap2(struct inode *inode, sector_t sector, + unsigned long *mapped_blocks, + struct buffer_head *bh_result, int create, sector_t *bmap); /* fat/dir.c */ extern const struct file_operations fat_dir_operations; diff --git a/fs/fat/file.c b/fs/fat/file.c index 85f79a89e747..92e9e753b554 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -17,8 +17,12 @@ #include <linux/blkdev.h> #include <linux/fsnotify.h> #include <linux/security.h> +#include <linux/falloc.h> #include "fat.h" +static long fat_fallocate(struct file *file, int mode, + loff_t offset, loff_t len); + static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) { u32 attr; @@ -182,6 +186,7 @@ const struct file_operations fat_file_operations = { #endif .fsync = fat_file_fsync, .splice_read = generic_file_splice_read, + .fallocate = fat_fallocate, }; static int fat_cont_expand(struct inode *inode, loff_t size) @@ -220,6 +225,75 @@ out: return err; } +/* + * Preallocate space for a file. This implements fat's fallocate file + * operation, which gets called from sys_fallocate system call. User + * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set + * we just allocate clusters without zeroing them out. Otherwise we + * allocate and zero out clusters via an expanding truncate. + */ +static long fat_fallocate(struct file *file, int mode, + loff_t offset, loff_t len) +{ + int cluster; + int nr_cluster; /* Number of clusters to be allocated */ + loff_t mm_bytes; /* Number of bytes to be allocated for file */ + struct inode *inode = file->f_mapping->host; + struct super_block *sb = inode->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); + int err = 0; + + /* No support for hole punch or other fallocate flags. */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + /* No support for dir */ + if (!S_ISREG(inode->i_mode)) + return -EOPNOTSUPP; + + mutex_lock(&inode->i_mutex); + if ((offset + len) <= MSDOS_I(inode)->i_disksize) + goto error; + + err = inode_newsize_ok(inode, (len + offset)); + if (err) + goto error; + + if (mode & FALLOC_FL_KEEP_SIZE) { + /* First compute the number of clusters to be allocated */ + mm_bytes = offset + len - round_up(MSDOS_I(inode)->i_disksize, + sbi->cluster_size); + nr_cluster = (mm_bytes + (sbi->cluster_size - 1)) >> + sbi->cluster_bits; + + /* Start the allocation.We are not zeroing out the clusters */ + while (nr_cluster-- > 0) { + err = fat_alloc_clusters(inode, &cluster, 1); + if (err) { + fat_msg(sb, KERN_ERR, + "fat_fallocate(): fat_alloc_clusters() error"); + goto error; + } + err = fat_chain_add(inode, cluster, 1); + if (err) { + fat_free_clusters(inode, cluster); + goto error; + } + MSDOS_I(inode)->i_disksize += sbi->cluster_size; + } + } else { + /* This is just an expanding truncate */ + err = fat_cont_expand(inode, (offset + len)); + if (err) + fat_msg(sb, KERN_ERR, + "fat_fallocate(): fat_cont_expand() error"); + } + +error: + mutex_unlock(&inode->i_mutex); + return err; +} + /* Free all clusters after the skip'th cluster. */ static int fat_free(struct inode *inode, int skip) { @@ -300,8 +374,10 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset) * This protects against truncating a file bigger than it was then * trying to write into the hole. */ - if (MSDOS_I(inode)->mmu_private > offset) + if (MSDOS_I(inode)->i_disksize > offset) { MSDOS_I(inode)->mmu_private = offset; + MSDOS_I(inode)->i_disksize = offset; + } nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 756aead10d96..c3b86c58ad88 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -116,6 +116,25 @@ static int fat_add_cluster(struct inode *inode) return err; } +static void check_fallocated_region(struct inode *inode, sector_t iblock, + unsigned long *max_blocks, struct buffer_head *bh_result) +{ + struct super_block *sb = inode->i_sb; + sector_t last_block, disk_block; + const unsigned long blocksize = sb->s_blocksize; + const unsigned char blocksize_bits = sb->s_blocksize_bits; + + last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1)) + >> blocksize_bits; + disk_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1)) + >> blocksize_bits; + if (iblock >= last_block && iblock <= disk_block) { + MSDOS_I(inode)->mmu_private += *max_blocks << blocksize_bits; + set_buffer_new(bh_result); + } + +} + static inline int __fat_get_block(struct inode *inode, sector_t iblock, unsigned long *max_blocks, struct buffer_head *bh_result, int create) @@ -130,8 +149,11 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, if (err) return err; if (phys) { - map_bh(bh_result, sb, phys); *max_blocks = min(mapped_blocks, *max_blocks); + if (create) + check_fallocated_region(inode, iblock, max_blocks, + bh_result); + map_bh(bh_result, sb, phys); return 0; } if (!create) @@ -155,6 +177,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, *max_blocks = min(mapped_blocks, *max_blocks); MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits; + MSDOS_I(inode)->i_disksize = MSDOS_I(inode)->mmu_private; err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create); if (err) @@ -269,6 +292,13 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, loff_t size = offset + count; if (MSDOS_I(inode)->mmu_private < size) return 0; + + /* + * In case of writing in fallocated region, return 0 and + * fallback to buffered write. + */ + if (MSDOS_I(inode)->i_disksize > MSDOS_I(inode)->mmu_private) + return 0; } /* @@ -282,13 +312,36 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, return ret; } +static int fat_get_block_bmap(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + struct super_block *sb = inode->i_sb; + unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; + int err; + sector_t bmap; + unsigned long mapped_blocks; + + err = fat_bmap2(inode, iblock, &mapped_blocks, bh_result, create, + &bmap); + if (err) + return err; + + if (bmap) { + map_bh(bh_result, sb, bmap); + max_blocks = min(mapped_blocks, max_blocks); + } + + bh_result->b_size = max_blocks << sb->s_blocksize_bits; + return 0; +} + static sector_t _fat_bmap(struct address_space *mapping, sector_t block) { sector_t blocknr; /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ down_read(&MSDOS_I(mapping->host)->truncate_lock); - blocknr = generic_block_bmap(mapping, block, fat_get_block); + blocknr = generic_block_bmap(mapping, block, fat_get_block_bmap); up_read(&MSDOS_I(mapping->host)->truncate_lock); return blocknr; @@ -469,7 +522,6 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) error = fat_calc_dir_size(inode); if (error < 0) return error; - MSDOS_I(inode)->mmu_private = inode->i_size; set_nlink(inode, fat_subdirs(inode)); } else { /* not a directory */ @@ -484,8 +536,12 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_op = &fat_file_inode_operations; inode->i_fop = &fat_file_operations; inode->i_mapping->a_ops = &fat_aops; - MSDOS_I(inode)->mmu_private = inode->i_size; } + + MSDOS_I(inode)->mmu_private = inode->i_size; + MSDOS_I(inode)->i_disksize = round_up(inode->i_size, + inode->i_sb->s_blocksize); + if (de->attr & ATTR_SYS) { if (sbi->options.sys_immutable) inode->i_flags |= S_IMMUTABLE; @@ -550,12 +606,34 @@ out: EXPORT_SYMBOL_GPL(fat_build_inode); +static int __fat_write_inode(struct inode *inode, int wait); static void fat_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); if (!inode->i_nlink) { inode->i_size = 0; fat_truncate_blocks(inode, 0); + } else { + /* Release unwritten fallocated blocks on inode eviction. */ + if (MSDOS_I(inode)->i_disksize > + round_up(MSDOS_I(inode)->mmu_private, + inode->i_sb->s_blocksize)) { + int err; + fat_truncate_blocks(inode, MSDOS_I(inode)->mmu_private); + /* Fallocate results in updating the i_start/iogstart + * for the zero byte file. So, make it return to + * original state during evict and commit it to avoid + * any corruption on the next access to the cluster + * chain for the file. + */ + err = __fat_write_inode(inode, inode_needs_sync(inode)); + if (err) { + fat_msg(inode->i_sb, KERN_WARNING, "Failed to " + "update on disk inode for unused fallocated " + "blocks, inode could be corrupted. Please run " + "fsck"); + } + } } invalidate_inode_buffers(inode); clear_inode(inode); @@ -1293,6 +1371,7 @@ static int fat_read_root(struct inode *inode) & ~((loff_t)sbi->cluster_size - 1)) >> 9; MSDOS_I(inode)->i_logstart = 0; MSDOS_I(inode)->mmu_private = inode->i_size; + MSDOS_I(inode)->i_disksize = inode->i_size; fat_save_attrs(inode, ATTR_DIR); inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index e11d60cc867b..6cf9693ea62d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2414,8 +2414,12 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, - unsigned long size, pgoff_t pgoff); +static inline int generic_file_remap_pages(struct vm_area_struct *vma, + unsigned long addr, unsigned long size, pgoff_t pgoff) +{ + BUG(); + return 0; +} int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/init/main.c b/init/main.c index e8ae1fef0908..bb1aed928f21 100644 --- a/init/main.c +++ b/init/main.c @@ -6,7 +6,7 @@ * GK 2/5/95 - Changed to support mounting root fs via NFS * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96 * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96 - * Simplified starting of init: Michael A. Griffith <grif@acm.org> + * Simplified starting of init: Michael A. Griffith <grif@acm.org> */ #define DEBUG /* Enable initcall_debug */ @@ -136,7 +136,7 @@ static char *ramdisk_execute_command; * Used to generate warnings if static_key manipulation functions are used * before jump_label_init is called. */ -bool static_key_initialized __read_mostly = false; +bool static_key_initialized __read_mostly; EXPORT_SYMBOL_GPL(static_key_initialized); /* @@ -159,8 +159,8 @@ static int __init set_reset_devices(char *str) __setup("reset_devices", set_reset_devices); -static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; -const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; +static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; +const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; extern const struct obs_kernel_param __setup_start[], __setup_end[]; @@ -199,7 +199,6 @@ static int __init obsolete_checksetup(char *line) * still work even if initially too large, it will just take slightly longer */ unsigned long loops_per_jiffy = (1<<12); - EXPORT_SYMBOL(loops_per_jiffy); static int __init debug_kernel(char *str) @@ -376,8 +375,8 @@ static void __init setup_command_line(char *command_line) initcall_command_line = memblock_virt_alloc(strlen(boot_command_line) + 1, 0); static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0); - strcpy (saved_command_line, boot_command_line); - strcpy (static_command_line, command_line); + strcpy(saved_command_line, boot_command_line); + strcpy(static_command_line, command_line); } /* @@ -445,8 +444,8 @@ void __init parse_early_options(char *cmdline) /* Arch code calls this early on, or if not, just before other parsing. */ void __init parse_early_param(void) { - static __initdata int done = 0; - static __initdata char tmp_cmdline[COMMAND_LINE_SIZE]; + static int done __initdata; + static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata; if (done) return; @@ -500,7 +499,8 @@ static void __init mm_init(void) asmlinkage __visible void __init start_kernel(void) { - char * command_line, *after_dashes; + char *command_line; + char *after_dashes; extern const struct kernel_param __start___param[], __stop___param[]; /* @@ -572,7 +572,8 @@ asmlinkage __visible void __init start_kernel(void) * fragile until we cpu_idle() for the first time. */ preempt_disable(); - if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) + if (WARN(!irqs_disabled(), + "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); idr_init_cache(); rcu_init(); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3214289df5a7..2ac9f133c4da 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -76,7 +76,7 @@ static bool kprobes_all_disarmed; /* This protects kprobe_table and optimizing_list */ static DEFINE_MUTEX(kprobe_mutex); -static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; +static DEFINE_PER_CPU(struct kprobe *, kprobe_instance); static struct { raw_spinlock_t lock ____cacheline_aligned_in_smp; } kretprobe_table_locks[KPROBE_TABLE_SIZE]; @@ -297,9 +297,9 @@ static inline void reset_kprobe_instance(void) /* * This routine is called either: - * - under the kprobe_mutex - during kprobe_[un]register() - * OR - * - with preemption disabled - from arch/xxx/kernel/kprobes.c + * - under the kprobe_mutex - during kprobe_[un]register() + * OR + * - with preemption disabled - from arch/xxx/kernel/kprobes.c */ struct kprobe *get_kprobe(void *addr) { @@ -567,7 +567,8 @@ static void wait_for_kprobe_optimizer(void) { mutex_lock(&kprobe_mutex); - while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) { + while (!list_empty(&optimizing_list) || + !list_empty(&unoptimizing_list)) { mutex_unlock(&kprobe_mutex); /* this will also make optimizing_work execute immmediately */ @@ -676,8 +677,8 @@ static void reuse_unused_kprobe(struct kprobe *ap) */ op = container_of(ap, struct optimized_kprobe, kp); if (unlikely(list_empty(&op->list))) - printk(KERN_WARNING "Warning: found a stray unused " - "aggrprobe@%p\n", ap->addr); + pr_warn("Warning: found a stray unused aggrprobe@%p\n", + ap->addr); /* Enable the probe again */ ap->flags &= ~KPROBE_FLAG_DISABLED; /* Optimize it again (remove from op->list) */ @@ -794,7 +795,7 @@ static void optimize_all_kprobes(void) if (!kprobe_disabled(p)) optimize_kprobe(p); } - printk(KERN_INFO "Kprobes globally optimized\n"); + pr_info("Kprobes globally optimized\n"); out: mutex_unlock(&kprobe_mutex); } @@ -824,7 +825,7 @@ static void unoptimize_all_kprobes(void) /* Wait for unoptimizing completion */ wait_for_kprobe_optimizer(); - printk(KERN_INFO "Kprobes globally unoptimized\n"); + pr_info("Kprobes globally unoptimized\n"); } static DEFINE_MUTEX(kprobe_sysctl_mutex); @@ -896,7 +897,7 @@ static void __disarm_kprobe(struct kprobe *p, bool reopt) /* There should be no unused kprobes can be reused without optimization */ static void reuse_unused_kprobe(struct kprobe *ap) { - printk(KERN_ERR "Error: There should be no unused kprobe here.\n"); + pr_err("Error: There should be no unused kprobe here.\n"); BUG_ON(kprobe_unused(ap)); } @@ -955,7 +956,8 @@ static void disarm_kprobe_ftrace(struct kprobe *p) } ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, (unsigned long)p->addr, 1, 0); - WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret); + WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", + p->addr, ret); } #else /* !CONFIG_KPROBES_ON_FTRACE */ #define prepare_kprobe(p) arch_prepare_kprobe(p) @@ -1389,7 +1391,7 @@ static struct kprobe *__get_valid_kprobe(struct kprobe *p) if (p != ap) { list_for_each_entry_rcu(list_p, &ap->list, list) if (list_p == p) - /* kprobe p is a valid probe */ + /* kprobe p is a valid probe */ goto valid; return NULL; } @@ -2018,8 +2020,8 @@ EXPORT_SYMBOL_GPL(enable_kprobe); void dump_kprobe(struct kprobe *kp) { - printk(KERN_WARNING "Dumping kprobe:\n"); - printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n", + pr_warn("Dumping kprobe:\n"); + pr_warn("Name: %s\nAddress: %p\nOffset: %x\n", kp->symbol_name, kp->addr, kp->offset); } NOKPROBE_SYMBOL(dump_kprobe); @@ -2128,7 +2130,7 @@ static int __init init_kprobes(void) kprobe_lookup_name(kretprobe_blacklist[i].name, kretprobe_blacklist[i].addr); if (!kretprobe_blacklist[i].addr) - printk("kretprobe: lookup failed: %s\n", + pr_warn("kretprobe: lookup failed: %s\n", kretprobe_blacklist[i].name); } } @@ -2310,7 +2312,7 @@ static void arm_all_kprobes(void) } kprobes_all_disarmed = false; - printk(KERN_INFO "Kprobes globally enabled\n"); + pr_info("Kprobes globally enabled\n"); already_enabled: mutex_unlock(&kprobe_mutex); @@ -2332,7 +2334,7 @@ static void disarm_all_kprobes(void) } kprobes_all_disarmed = true; - printk(KERN_INFO "Kprobes globally disabled\n"); + pr_info("Kprobes globally disabled\n"); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; diff --git a/mm/Makefile b/mm/Makefile index 8338473c329a..bc0422b4257e 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -3,7 +3,7 @@ # mmu-y := nommu.o -mmu-$(CONFIG_MMU) := fremap.o gup.o highmem.o madvise.o memory.o mincore.o \ +mmu-$(CONFIG_MMU) := gup.o highmem.o madvise.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ vmalloc.o pagewalk.o pgtable-generic.o diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1706cbbdf5f0..382f8ece1cb2 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -234,11 +234,46 @@ static ssize_t stable_pages_required_show(struct device *dev, } static DEVICE_ATTR_RO(stable_pages_required); +static ssize_t strictlimit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int val; + ssize_t ret; + + ret = kstrtouint(buf, 10, &val); + if (ret < 0) + return ret; + + switch (val) { + case 0: + bdi->capabilities &= ~BDI_CAP_STRICTLIMIT; + break; + case 1: + bdi->capabilities |= BDI_CAP_STRICTLIMIT; + break; + default: + return -EINVAL; + } + + return count; +} +static ssize_t strictlimit_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return snprintf(page, PAGE_SIZE-1, "%d\n", + !!(bdi->capabilities & BDI_CAP_STRICTLIMIT)); +} +static DEVICE_ATTR_RW(strictlimit); + static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, &dev_attr_stable_pages_required.attr, + &dev_attr_strictlimit.attr, NULL, }; ATTRIBUTE_GROUPS(bdi_dev); diff --git a/mm/fremap.c b/mm/fremap.c deleted file mode 100644 index 72b8fa361433..000000000000 --- a/mm/fremap.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * linux/mm/fremap.c - * - * Explicit pagetable population and nonlinear (random) mappings support. - * - * started by Ingo Molnar, Copyright (C) 2002, 2003 - */ -#include <linux/export.h> -#include <linux/backing-dev.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/file.h> -#include <linux/mman.h> -#include <linux/pagemap.h> -#include <linux/swapops.h> -#include <linux/rmap.h> -#include <linux/syscalls.h> -#include <linux/mmu_notifier.h> - -#include <asm/mmu_context.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> - -#include "internal.h" - -static int mm_counter(struct page *page) -{ - return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES; -} - -static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - pte_t pte = *ptep; - struct page *page; - swp_entry_t entry; - - if (pte_present(pte)) { - flush_cache_page(vma, addr, pte_pfn(pte)); - pte = ptep_clear_flush(vma, addr, ptep); - page = vm_normal_page(vma, addr, pte); - if (page) { - if (pte_dirty(pte)) - set_page_dirty(page); - update_hiwater_rss(mm); - dec_mm_counter(mm, mm_counter(page)); - page_remove_rmap(page); - page_cache_release(page); - } - } else { /* zap_pte() is not called when pte_none() */ - if (!pte_file(pte)) { - update_hiwater_rss(mm); - entry = pte_to_swp_entry(pte); - if (non_swap_entry(entry)) { - if (is_migration_entry(entry)) { - page = migration_entry_to_page(entry); - dec_mm_counter(mm, mm_counter(page)); - } - } else { - free_swap_and_cache(entry); - dec_mm_counter(mm, MM_SWAPENTS); - } - } - pte_clear_not_present_full(mm, addr, ptep, 0); - } -} - -/* - * Install a file pte to a given virtual memory address, release any - * previously existing mapping. - */ -static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, unsigned long pgoff, pgprot_t prot) -{ - int err = -ENOMEM; - pte_t *pte, ptfile; - spinlock_t *ptl; - - pte = get_locked_pte(mm, addr, &ptl); - if (!pte) - goto out; - - ptfile = pgoff_to_pte(pgoff); - - if (!pte_none(*pte)) - zap_pte(mm, vma, addr, pte); - - set_pte_at(mm, addr, pte, pte_file_mksoft_dirty(ptfile)); - /* - * We don't need to run update_mmu_cache() here because the "file pte" - * being installed by install_file_pte() is not a real pte - it's a - * non-present entry (like a swap entry), noting what file offset should - * be mapped there when there's a fault (in a non-linear vma where - * that's not obvious). - */ - pte_unmap_unlock(pte, ptl); - err = 0; -out: - return err; -} - -int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, - unsigned long size, pgoff_t pgoff) -{ - struct mm_struct *mm = vma->vm_mm; - int err; - - do { - err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot); - if (err) - return err; - - size -= PAGE_SIZE; - addr += PAGE_SIZE; - pgoff++; - } while (size); - - return 0; -} -EXPORT_SYMBOL(generic_file_remap_pages); - -/** - * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma - * @start: start of the remapped virtual memory range - * @size: size of the remapped virtual memory range - * @prot: new protection bits of the range (see NOTE) - * @pgoff: to-be-mapped page of the backing store file - * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO. - * - * sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma - * (shared backing store file). - * - * This syscall works purely via pagetables, so it's the most efficient - * way to map the same (large) file into a given virtual window. Unlike - * mmap()/mremap() it does not create any new vmas. The new mappings are - * also safe across swapout. - * - * NOTE: the @prot parameter right now is ignored (but must be zero), - * and the vma's default protection is used. Arbitrary protections - * might be implemented in the future. - */ -SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, - unsigned long, prot, unsigned long, pgoff, unsigned long, flags) -{ - struct mm_struct *mm = current->mm; - struct address_space *mapping; - struct vm_area_struct *vma; - int err = -EINVAL; - int has_write_lock = 0; - vm_flags_t vm_flags = 0; - - pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. " - "See Documentation/vm/remap_file_pages.txt.\n", - current->comm, current->pid); - - if (prot) - return err; - /* - * Sanitize the syscall parameters: - */ - start = start & PAGE_MASK; - size = size & PAGE_MASK; - - /* Does the address range wrap, or is the span zero-sized? */ - if (start + size <= start) - return err; - - /* Does pgoff wrap? */ - if (pgoff + (size >> PAGE_SHIFT) < pgoff) - return err; - - /* Can we represent this offset inside this architecture's pte's? */ -#if PTE_FILE_MAX_BITS < BITS_PER_LONG - if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS)) - return err; -#endif - - /* We need down_write() to change vma->vm_flags. */ - down_read(&mm->mmap_sem); - retry: - vma = find_vma(mm, start); - - /* - * Make sure the vma is shared, that it supports prefaulting, - * and that the remapped range is valid and fully within - * the single existing vma. - */ - if (!vma || !(vma->vm_flags & VM_SHARED)) - goto out; - - if (!vma->vm_ops || !vma->vm_ops->remap_pages) - goto out; - - if (start < vma->vm_start || start + size > vma->vm_end) - goto out; - - /* Must set VM_NONLINEAR before any pages are populated. */ - if (!(vma->vm_flags & VM_NONLINEAR)) { - /* - * vm_private_data is used as a swapout cursor - * in a VM_NONLINEAR vma. - */ - if (vma->vm_private_data) - goto out; - - /* Don't need a nonlinear mapping, exit success */ - if (pgoff == linear_page_index(vma, start)) { - err = 0; - goto out; - } - - if (!has_write_lock) { -get_write_lock: - up_read(&mm->mmap_sem); - down_write(&mm->mmap_sem); - has_write_lock = 1; - goto retry; - } - mapping = vma->vm_file->f_mapping; - /* - * page_mkclean doesn't work on nonlinear vmas, so if - * dirty pages need to be accounted, emulate with linear - * vmas. - */ - if (mapping_cap_account_dirty(mapping)) { - unsigned long addr; - struct file *file = get_file(vma->vm_file); - /* mmap_region may free vma; grab the info now */ - vm_flags = vma->vm_flags; - - addr = mmap_region(file, start, size, vm_flags, pgoff); - fput(file); - if (IS_ERR_VALUE(addr)) { - err = addr; - } else { - BUG_ON(addr != start); - err = 0; - } - goto out_freed; - } - mutex_lock(&mapping->i_mmap_mutex); - flush_dcache_mmap_lock(mapping); - vma->vm_flags |= VM_NONLINEAR; - vma_interval_tree_remove(vma, &mapping->i_mmap); - vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); - flush_dcache_mmap_unlock(mapping); - mutex_unlock(&mapping->i_mmap_mutex); - } - - if (vma->vm_flags & VM_LOCKED) { - /* - * drop PG_Mlocked flag for over-mapped range - */ - if (!has_write_lock) - goto get_write_lock; - vm_flags = vma->vm_flags; - munlock_vma_pages_range(vma, start, start + size); - vma->vm_flags = vm_flags; - } - - mmu_notifier_invalidate_range_start(mm, start, start + size); - err = vma->vm_ops->remap_pages(vma, start, size, pgoff); - mmu_notifier_invalidate_range_end(mm, start, start + size); - - /* - * We can't clear VM_NONLINEAR because we'd have to do - * it after ->populate completes, and that would prevent - * downgrading the lock. (Locks can't be upgraded). - */ - -out: - if (vma) - vm_flags = vma->vm_flags; -out_freed: - if (likely(!has_write_lock)) - up_read(&mm->mmap_sem); - else - up_write(&mm->mmap_sem); - if (!err && ((vm_flags & VM_LOCKED) || !(flags & MAP_NONBLOCK))) - mm_populate(start, size); - - return err; -} diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3e04ef4a223c..241cf4f91e24 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4132,6 +4132,10 @@ static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of, if (mem_cgroup_is_root(memcg)) return -EINVAL; + pr_info_once("%s (%d): memory.force_empty is deprecated and will be " + "removed. Let us know if it is needed in your usecase at " + "linux-mm@kvack.org\n", + current->comm, task_pid_nr(current)); return mem_cgroup_force_empty(memcg) ?: nbytes; } @@ -5308,6 +5312,7 @@ static struct cftype mem_cgroup_files[] = { }, { .name = "force_empty", + .flags = CFTYPE_INSANE, .write = mem_cgroup_force_empty_write, }, { diff --git a/mm/mmap.c b/mm/mmap.c index 129b847d30cc..af82b67c9f4d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2581,6 +2581,75 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) return vm_munmap(addr, len); } + +/* + * Emulation of deprecated remap_file_pages() syscall. + */ +SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, + unsigned long, prot, unsigned long, pgoff, unsigned long, flags) +{ + + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long populate = 0; + unsigned long ret = -EINVAL; + struct file *file; + + pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. " + "See Documentation/vm/remap_file_pages.txt.\n", + current->comm, current->pid); + + if (prot) + return ret; + start = start & PAGE_MASK; + size = size & PAGE_MASK; + + if (start + size <= start) + return ret; + + /* Does pgoff wrap? */ + if (pgoff + (size >> PAGE_SHIFT) < pgoff) + return ret; + + down_write(&mm->mmap_sem); + vma = find_vma(mm, start); + + if (!vma || !(vma->vm_flags & VM_SHARED)) + goto out; + + if (start < vma->vm_start || start + size > vma->vm_end) + goto out; + + if (pgoff == linear_page_index(vma, start)) { + ret = 0; + goto out; + } + + prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; + prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0; + prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0; + + flags &= MAP_NONBLOCK; + flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; + if (vma->vm_flags & VM_LOCKED) { + flags |= MAP_LOCKED; + /* drop PG_Mlocked flag for over-mapped range */ + munlock_vma_pages_range(vma, start, start + size); + } + + file = get_file(vma->vm_file); + ret = do_mmap_pgoff(vma->vm_file, start, size, + prot, flags, pgoff, &populate); + fput(file); +out: + up_write(&mm->mmap_sem); + if (populate) + mm_populate(ret, populate); + if (!IS_ERR_VALUE(ret)) + ret = 0; + return ret; +} + static inline void verify_mm_writelocked(struct mm_struct *mm) { #ifdef CONFIG_DEBUG_VM diff --git a/mm/nommu.c b/mm/nommu.c index 4a852f6c5709..6cc583a131dd 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1999,14 +1999,6 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf) } EXPORT_SYMBOL(filemap_map_pages); -int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, - unsigned long size, pgoff_t pgoff) -{ - BUG(); - return 0; -} -EXPORT_SYMBOL(generic_file_remap_pages); - static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, int write) { |