summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2014-06-27 14:08:28 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2014-06-27 14:08:29 +1000
commit3c45d5624cac27be5e4bc1341033d0c7ad43f2b2 (patch)
tree1d30820f05a27c0bc9dc3c037e944bd04178fb5c
parent6cc4ee2ce46c098aeda38ab6de7d5286f461083e (diff)
parent3e854aac314c850abe220ae96d5adcabed337880 (diff)
Merge branch 'akpm-current/current'
-rw-r--r--CREDITS7
-rw-r--r--Documentation/ABI/testing/sysfs-fs-nilfs2269
-rw-r--r--Documentation/RCU/whatisRCU.txt2
-rw-r--r--Documentation/cgroups/memcg_test.txt160
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--Documentation/leds/leds-class.txt3
-rw-r--r--Documentation/oops-tracing.txt2
-rw-r--r--Documentation/printk-formats.txt6
-rw-r--r--Documentation/sysctl/kernel.txt1
-rw-r--r--MAINTAINERS5
-rw-r--r--Makefile3
-rw-r--r--arch/alpha/include/asm/Kbuild1
-rw-r--r--arch/alpha/include/asm/scatterlist.h6
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/scatterlist.h12
-rw-r--r--arch/arm/mm/dma-mapping.c1
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/cris/include/asm/Kbuild1
-rw-r--r--arch/cris/include/asm/scatterlist.h6
-rw-r--r--arch/frv/include/asm/Kbuild1
-rw-r--r--arch/frv/include/asm/scatterlist.h6
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/ia64/include/asm/Kbuild1
-rw-r--r--arch/ia64/include/asm/scatterlist.h7
-rw-r--r--arch/m32r/include/asm/Kbuild1
-rw-r--r--arch/m32r/include/asm/scatterlist.h6
-rw-r--r--arch/m68k/kernel/sys_m68k.c18
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/scatterlist.h1
-rw-r--r--arch/mn10300/include/asm/Kbuild1
-rw-r--r--arch/mn10300/include/asm/scatterlist.h16
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/scatterlist.h17
-rw-r--r--arch/powerpc/kvm/Makefile1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c19
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.c240
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.h27
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c1
-rw-r--r--arch/powerpc/platforms/44x/warp.c1
-rw-r--r--arch/powerpc/platforms/52xx/efika.c1
-rw-r--r--arch/powerpc/platforms/amigaone/setup.c1
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/scatterlist.h3
-rw-r--r--arch/score/include/asm/Kbuild1
-rw-r--r--arch/score/include/asm/scatterlist.h6
-rw-r--r--arch/sh/drivers/dma/Kconfig5
-rw-r--r--arch/sh/include/cpu-sh4/cpu/dma-register.h1
-rw-r--r--arch/sh/include/cpu-sh4a/cpu/dma.h3
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/scatterlist.h8
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/Kbuild3
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/scatterlist.h8
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init_64.c36
-rw-r--r--block/bio.c48
-rw-r--r--block/genhd.c2
-rw-r--r--crypto/zlib.c8
-rw-r--r--drivers/ata/Kconfig1
-rw-r--r--drivers/ata/libata-core.c72
-rw-r--r--drivers/base/Kconfig10
-rw-r--r--drivers/base/dma-contiguous.c220
-rw-r--r--drivers/base/memory.c30
-rw-r--r--drivers/block/zram/zram_drv.h5
-rw-r--r--drivers/gpu/drm/drm_hashtab.c2
-rw-r--r--drivers/input/Kconfig9
-rw-r--r--drivers/input/Makefile3
-rw-r--r--drivers/input/input.c6
-rw-r--r--drivers/input/leds.c249
-rw-r--r--drivers/leds/Kconfig3
-rw-r--r--drivers/misc/ti-st/st_core.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c2
-rw-r--r--drivers/net/irda/donauboe.c15
-rw-r--r--drivers/parport/parport_ip32.c2
-rw-r--r--drivers/rtc/Kconfig19
-rw-r--r--drivers/rtc/rtc-ds1343.c75
-rw-r--r--drivers/rtc/rtc-ds1742.c2
-rw-r--r--drivers/rtc/rtc-efi.c32
-rw-r--r--drivers/staging/lustre/lustre/libcfs/hash.c4
-rw-r--r--drivers/tty/Kconfig4
-rw-r--r--drivers/tty/vt/keyboard.c110
-rw-r--r--drivers/video/backlight/backlight.c2
-rw-r--r--fs/adfs/dir_fplus.c4
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/binfmt_elf.c23
-rw-r--r--fs/cachefiles/bind.c6
-rw-r--r--fs/cachefiles/daemon.c6
-rw-r--r--fs/cifs/cifsacl.c2
-rw-r--r--fs/cifs/cifssmb.c20
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smb2file.c4
-rw-r--r--fs/cifs/smb2misc.c38
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/cifs/smb2pdu.h28
-rw-r--r--fs/compat.c2
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/ore_raid.c2
-rw-r--r--fs/ext4/fsync.c5
-rw-r--r--fs/hfsplus/catalog.c89
-rw-r--r--fs/hfsplus/dir.c11
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/isofs/Makefile2
-rw-r--r--fs/isofs/compress.c10
-rw-r--r--fs/isofs/export.c11
-rw-r--r--fs/isofs/inode.c88
-rw-r--r--fs/isofs/namei.c5
-rw-r--r--fs/isofs/rock.c44
-rw-r--r--fs/jbd/journal.c6
-rw-r--r--fs/jbd/revoke.c2
-rw-r--r--fs/jffs2/compr_zlib.c7
-rw-r--r--fs/mpage.c23
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nilfs2/Makefile2
-rw-r--r--fs/nilfs2/nilfs.h8
-rw-r--r--fs/nilfs2/super.c11
-rw-r--r--fs/nilfs2/sysfs.c1137
-rw-r--r--fs/nilfs2/sysfs.h176
-rw-r--r--fs/nilfs2/the_nilfs.c17
-rw-r--r--fs/nilfs2/the_nilfs.h20
-rw-r--r--fs/notify/inode_mark.c2
-rw-r--r--fs/notify/vfsmount_mark.c2
-rw-r--r--fs/ocfs2/alloc.c15
-rw-r--r--fs/ocfs2/aops.c21
-rw-r--r--fs/ocfs2/cluster/quorum.c13
-rw-r--r--fs/ocfs2/cluster/tcp.c45
-rw-r--r--fs/ocfs2/cluster/tcp.h1
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c5
-rw-r--r--fs/ocfs2/inode.c10
-rw-r--r--fs/ocfs2/ioctl.c129
-rw-r--r--fs/ocfs2/move_extents.c2
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/slot_map.c2
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/stat.c22
-rw-r--r--fs/pstore/ram_core.c2
-rw-r--r--fs/qnx6/Makefile1
-rw-r--r--fs/qnx6/dir.c26
-rw-r--r--fs/qnx6/inode.c99
-rw-r--r--fs/qnx6/namei.c6
-rw-r--r--fs/qnx6/qnx6.h12
-rw-r--r--fs/qnx6/super_mmi.c22
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/do_balan.c2
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/ibalance.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/item_ops.c4
-rw-r--r--fs/reiserfs/lbalance.c2
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c2
-rw-r--r--fs/reiserfs/stree.c2
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/reiserfs/xattr.c22
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/romfs/super.c23
-rw-r--r--fs/seq_file.c30
-rw-r--r--fs/squashfs/file_direct.c2
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/ufs/Makefile1
-rw-r--r--fs/ufs/super.c304
-rw-r--r--fs/ufs/ufs.h10
-rw-r--r--include/linux/cma.h27
-rw-r--r--include/linux/crc64_ecma.h56
-rw-r--r--include/linux/decompress/bunzip2.h8
-rw-r--r--include/linux/decompress/generic.h10
-rw-r--r--include/linux/decompress/inflate.h8
-rw-r--r--include/linux/decompress/unlz4.h8
-rw-r--r--include/linux/decompress/unlzma.h8
-rw-r--r--include/linux/decompress/unlzo.h8
-rw-r--r--include/linux/decompress/unxz.h8
-rw-r--r--include/linux/dma-contiguous.h11
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/glob.h10
-rw-r--r--include/linux/hugetlb.h7
-rw-r--r--include/linux/hugetlb_inline.h7
-rw-r--r--include/linux/input.h21
-rw-r--r--include/linux/kernel.h38
-rw-r--r--include/linux/klist.h2
-rw-r--r--include/linux/list.h14
-rw-r--r--include/linux/memblock.h4
-rw-r--r--include/linux/memcontrol.h94
-rw-r--r--include/linux/memory_hotplug.h9
-rw-r--r--include/linux/mm_types.h1
-rw-r--r--include/linux/mmzone.h2
-rw-r--r--include/linux/page-flags.h21
-rw-r--r--include/linux/page_cgroup.h70
-rw-r--r--include/linux/pagemap.h28
-rw-r--r--include/linux/rculist.h8
-rw-r--r--include/linux/scatterlist.h2
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/shm.h18
-rw-r--r--include/linux/slab.h14
-rw-r--r--include/linux/string.h1
-rw-r--r--include/linux/swap.h15
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/zbud.h2
-rw-r--r--include/linux/zlib.h4
-rw-r--r--include/linux/zpool.h224
-rw-r--r--include/scsi/scsi.h2
-rw-r--r--include/trace/events/migrate.h1
-rw-r--r--init/Kconfig46
-rw-r--r--init/do_mounts.c12
-rw-r--r--init/do_mounts_rd.c10
-rw-r--r--init/initramfs.c58
-rw-r--r--ipc/shm.c75
-rw-r--r--kernel/auditfilter.c4
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/events/uprobes.c15
-rw-r--r--kernel/fork.c59
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/posix-timers.c57
-rw-r--r--kernel/printk/printk.c55
-rw-r--r--kernel/test_kprobes.c87
-rw-r--r--kernel/watchdog.c11
-rw-r--r--lib/Kconfig47
-rw-r--r--lib/Makefile3
-rw-r--r--lib/cmdline.c15
-rw-r--r--lib/crc64_ecma.c341
-rw-r--r--lib/decompress.c2
-rw-r--r--lib/decompress_bunzip2.c26
-rw-r--r--lib/decompress_inflate.c12
-rw-r--r--lib/decompress_unlz4.c83
-rw-r--r--lib/decompress_unlzma.c28
-rw-r--r--lib/decompress_unlzo.c12
-rw-r--r--lib/decompress_unxz.c10
-rw-r--r--lib/glob.c287
-rw-r--r--lib/idr.c25
-rw-r--r--lib/klist.c6
-rw-r--r--lib/list_sort.c71
-rw-r--r--lib/scatterlist.c4
-rw-r--r--lib/string_helpers.c15
-rw-r--r--lib/test-kstrtox.c2
-rw-r--r--lib/vsprintf.c20
-rw-r--r--mm/Kconfig54
-rw-r--r--mm/Makefile2
-rw-r--r--mm/cma.c335
-rw-r--r--mm/compaction.c17
-rw-r--r--mm/filemap.c13
-rw-r--r--mm/huge_memory.c65
-rw-r--r--mm/hugetlb.c1
-rw-r--r--mm/internal.h2
-rw-r--r--mm/madvise.c3
-rw-r--r--mm/memcontrol.c1509
-rw-r--r--mm/memory-failure.c15
-rw-r--r--mm/memory.c53
-rw-r--r--mm/memory_hotplug.c17
-rw-r--r--mm/migrate.c44
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/page_alloc.c37
-rw-r--r--mm/readahead.c3
-rw-r--r--mm/rmap.c30
-rw-r--r--mm/shmem.c61
-rw-r--r--mm/slab.c223
-rw-r--r--mm/slab.h45
-rw-r--r--mm/slub.c235
-rw-r--r--mm/swap.c40
-rw-r--r--mm/swap_state.c8
-rw-r--r--mm/swapfile.c21
-rw-r--r--mm/truncate.c9
-rw-r--r--mm/util.c30
-rw-r--r--mm/vmalloc.c16
-rw-r--r--mm/vmscan.c154
-rw-r--r--mm/zbud.c123
-rw-r--r--mm/zpool.c207
-rw-r--r--mm/zsmalloc.c83
-rw-r--r--mm/zswap.c82
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv6/addrlabel.c2
-rw-r--r--net/xfrm/xfrm_policy.c4
-rwxr-xr-xscripts/checkpatch.pl83
-rw-r--r--tools/testing/selftests/cpu-hotplug/Makefile2
-rw-r--r--tools/testing/selftests/memory-hotplug/Makefile2
292 files changed, 6786 insertions, 3750 deletions
diff --git a/CREDITS b/CREDITS
index 28ee1514b9de..03343bf820e4 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1381,6 +1381,9 @@ S: 17 rue Danton
S: F - 94270 Le Kremlin-Bicêtre
S: France
+N: Jack Hammer
+D: IBM ServeRAID RAID (ips) driver maintenance
+
N: Greg Hankins
E: gregh@cc.gatech.edu
D: fixed keyboard driver to separate LED and locking status
@@ -1691,6 +1694,10 @@ S: Reading
S: RG6 2NU
S: United Kingdom
+N: Dave Jeffery
+E: dhjeffery@gmail.com
+D: SCSI hacks and IBM ServeRAID RAID driver maintenance
+
N: Jakub Jelinek
E: jakub@redhat.com
W: http://sunsite.mff.cuni.cz/~jj
diff --git a/Documentation/ABI/testing/sysfs-fs-nilfs2 b/Documentation/ABI/testing/sysfs-fs-nilfs2
new file mode 100644
index 000000000000..304ba84a973a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-nilfs2
@@ -0,0 +1,269 @@
+
+What: /sys/fs/nilfs2/features/revision
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show current revision of NILFS file system driver.
+ This value informs about file system revision that
+ driver is ready to support.
+
+What: /sys/fs/nilfs2/features/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/features group.
+
+What: /sys/fs/nilfs2/<device>/revision
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show NILFS file system revision on volume.
+ This value informs about metadata structures'
+ revision on mounted volume.
+
+What: /sys/fs/nilfs2/<device>/blocksize
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's block size in bytes.
+
+What: /sys/fs/nilfs2/<device>/device_size
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume size in bytes.
+
+What: /sys/fs/nilfs2/<device>/free_blocks
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of free blocks on volume.
+
+What: /sys/fs/nilfs2/<device>/uuid
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's UUID (Universally Unique Identifier).
+
+What: /sys/fs/nilfs2/<device>/volume_name
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's label.
+
+What: /sys/fs/nilfs2/<device>/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device> group.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show last write time of super block in human-readable
+ format.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show last write time of super block in seconds.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show current write count of super block.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_update_frequency
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show/Set interval of periodical update of superblock
+ (in seconds).
+
+What: /sys/fs/nilfs2/<device>/superblock/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/superblock
+ group.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_pseg_block
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show start block number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_sequence
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show sequence value of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show checkpoint number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/current_seg_sequence
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show segment sequence counter.
+
+What: /sys/fs/nilfs2/<device>/segctor/current_last_full_seg
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show index number of the latest full segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_full_seg
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show index number of the full segment index
+ to be used next.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_pseg_offset
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show offset of next partial segment in the current
+ full segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show next checkpoint number.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment in
+ human-readable format.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment in seconds.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment not for cleaner
+ operation in human-readable format.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment not for cleaner
+ operation in seconds.
+
+What: /sys/fs/nilfs2/<device>/segctor/dirty_data_blocks_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of dirty data blocks.
+
+What: /sys/fs/nilfs2/<device>/segctor/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/segctor
+ group.
+
+What: /sys/fs/nilfs2/<device>/segments/segments_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of segments on a volume.
+
+What: /sys/fs/nilfs2/<device>/segments/blocks_per_segment
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of blocks in segment.
+
+What: /sys/fs/nilfs2/<device>/segments/clean_segments
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of clean segments.
+
+What: /sys/fs/nilfs2/<device>/segments/dirty_segments
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of dirty segments.
+
+What: /sys/fs/nilfs2/<device>/segments/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/segments
+ group.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/checkpoints_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of checkpoints on volume.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/snapshots_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of snapshots on volume.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/last_seg_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show checkpoint number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/next_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show next checkpoint number.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/checkpoints
+ group.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe content of /sys/fs/nilfs2/<device>/mounted_snapshots
+ group.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/inodes_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of inodes for snapshot.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/blocks_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of blocks for snapshot.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/mounted_snapshots/<id>
+ group.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 49b8551a3b68..e48c57f1943b 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -818,7 +818,7 @@ RCU pointer/list update:
list_add_tail_rcu
list_del_rcu
list_replace_rcu
- hlist_add_after_rcu
+ hlist_add_behind_rcu
hlist_add_before_rcu
hlist_add_head_rcu
hlist_del_rcu
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index 80ac454704b8..8870b0212150 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -24,64 +24,27 @@ Please note that implementation details can be changed.
a page/swp_entry may be charged (usage += PAGE_SIZE) at
- mem_cgroup_charge_anon()
- Called at new page fault and Copy-On-Write.
-
- mem_cgroup_try_charge_swapin()
- Called at do_swap_page() (page fault on swap entry) and swapoff.
- Followed by charge-commit-cancel protocol. (With swap accounting)
- At commit, a charge recorded in swap_cgroup is removed.
-
- mem_cgroup_charge_file()
- Called at add_to_page_cache()
-
- mem_cgroup_cache_charge_swapin()
- Called at shmem's swapin.
-
- mem_cgroup_prepare_migration()
- Called before migration. "extra" charge is done and followed by
- charge-commit-cancel protocol.
- At commit, charge against oldpage or newpage will be committed.
+ mem_cgroup_try_charge()
2. Uncharge
a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
- mem_cgroup_uncharge_page()
- Called when an anonymous page is fully unmapped. I.e., mapcount goes
- to 0. If the page is SwapCache, uncharge is delayed until
- mem_cgroup_uncharge_swapcache().
-
- mem_cgroup_uncharge_cache_page()
- Called when a page-cache is deleted from radix-tree. If the page is
- SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache().
-
- mem_cgroup_uncharge_swapcache()
- Called when SwapCache is removed from radix-tree. The charge itself
- is moved to swap_cgroup. (If mem+swap controller is disabled, no
- charge to swap occurs.)
+ mem_cgroup_uncharge()
+ Called when a page's refcount goes down to 0.
mem_cgroup_uncharge_swap()
Called when swp_entry's refcnt goes down to 0. A charge against swap
disappears.
- mem_cgroup_end_migration(old, new)
- At success of migration old is uncharged (if necessary), a charge
- to new page is committed. At failure, charge to old page is committed.
-
3. charge-commit-cancel
- In some case, we can't know this "charge" is valid or not at charging
- (because of races).
- To handle such case, there are charge-commit-cancel functions.
- mem_cgroup_try_charge_XXX
- mem_cgroup_commit_charge_XXX
- mem_cgroup_cancel_charge_XXX
- these are used in swap-in and migration.
+ Memcg pages are charged in two steps:
+ mem_cgroup_try_charge()
+ mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
At try_charge(), there are no flags to say "this page is charged".
at this point, usage += PAGE_SIZE.
- At commit(), the function checks the page should be charged or not
- and set flags or avoid charging.(usage -= PAGE_SIZE)
+ At commit(), the page is associated with the memcg.
At cancel(), simply usage -= PAGE_SIZE.
@@ -91,18 +54,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
Anonymous page is newly allocated at
- page fault into MAP_ANONYMOUS mapping.
- Copy-On-Write.
- It is charged right after it's allocated before doing any page table
- related operations. Of course, it's uncharged when another page is used
- for the fault address.
-
- At freeing anonymous page (by exit() or munmap()), zap_pte() is called
- and pages for ptes are freed one by one.(see mm/memory.c). Uncharges
- are done at page_remove_rmap() when page_mapcount() goes down to 0.
-
- Another page freeing is by page-reclaim (vmscan.c) and anonymous
- pages are swapped out. In this case, the page is marked as
- PageSwapCache(). uncharge() routine doesn't uncharge the page marked
- as SwapCache(). It's delayed until __delete_from_swap_cache().
4.1 Swap-in.
At swap-in, the page is taken from swap-cache. There are 2 cases.
@@ -111,41 +62,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
(b) If the SwapCache has been mapped by processes, it has been
charged already.
- This swap-in is one of the most complicated work. In do_swap_page(),
- following events occur when pte is unchanged.
-
- (1) the page (SwapCache) is looked up.
- (2) lock_page()
- (3) try_charge_swapin()
- (4) reuse_swap_page() (may call delete_swap_cache())
- (5) commit_charge_swapin()
- (6) swap_free().
-
- Considering following situation for example.
-
- (A) The page has not been charged before (2) and reuse_swap_page()
- doesn't call delete_from_swap_cache().
- (B) The page has not been charged before (2) and reuse_swap_page()
- calls delete_from_swap_cache().
- (C) The page has been charged before (2) and reuse_swap_page() doesn't
- call delete_from_swap_cache().
- (D) The page has been charged before (2) and reuse_swap_page() calls
- delete_from_swap_cache().
-
- memory.usage/memsw.usage changes to this page/swp_entry will be
- Case (A) (B) (C) (D)
- Event
- Before (2) 0/ 1 0/ 1 1/ 1 1/ 1
- ===========================================
- (3) +1/+1 +1/+1 +1/+1 +1/+1
- (4) - 0/ 0 - -1/ 0
- (5) 0/-1 0/ 0 -1/-1 0/ 0
- (6) - 0/-1 - 0/-1
- ===========================================
- Result 1/ 1 1/ 1 1/ 1 1/ 1
-
- In any cases, charges to this page should be 1/ 1.
-
4.2 Swap-out.
At swap-out, typical state transition is below.
@@ -158,28 +74,20 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
swp_entry's refcnt -= 1.
- At (b), the page is marked as SwapCache and not uncharged.
- At (d), the page is removed from SwapCache and a charge in page_cgroup
- is moved to swap_cgroup.
-
Finally, at task exit,
(e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
- Here, a charge in swap_cgroup disappears.
5. Page Cache
Page Cache is charged at
- add_to_page_cache_locked().
- uncharged at
- - __remove_from_page_cache().
-
The logic is very clear. (About migration, see below)
Note: __remove_from_page_cache() is called by remove_from_page_cache()
and __remove_mapping().
6. Shmem(tmpfs) Page Cache
- Memcg's charge/uncharge have special handlers of shmem. The best way
- to understand shmem's page state transition is to read mm/shmem.c.
+ The best way to understand shmem's page state transition is to read
+ mm/shmem.c.
But brief explanation of the behavior of memcg around shmem will be
helpful to understand the logic.
@@ -192,56 +100,10 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
It's charged when...
- A new page is added to shmem's radix-tree.
- A swp page is read. (move a charge from swap_cgroup to page_cgroup)
- It's uncharged when
- - A page is removed from radix-tree and not SwapCache.
- - When SwapCache is removed, a charge is moved to swap_cgroup.
- - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup
- disappears.
7. Page Migration
- One of the most complicated functions is page-migration-handler.
- Memcg has 2 routines. Assume that we are migrating a page's contents
- from OLDPAGE to NEWPAGE.
-
- Usual migration logic is..
- (a) remove the page from LRU.
- (b) allocate NEWPAGE (migration target)
- (c) lock by lock_page().
- (d) unmap all mappings.
- (e-1) If necessary, replace entry in radix-tree.
- (e-2) move contents of a page.
- (f) map all mappings again.
- (g) pushback the page to LRU.
- (-) OLDPAGE will be freed.
-
- Before (g), memcg should complete all necessary charge/uncharge to
- NEWPAGE/OLDPAGE.
-
- The point is....
- - If OLDPAGE is anonymous, all charges will be dropped at (d) because
- try_to_unmap() drops all mapcount and the page will not be
- SwapCache.
-
- - If OLDPAGE is SwapCache, charges will be kept at (g) because
- __delete_from_swap_cache() isn't called at (e-1)
-
- - If OLDPAGE is page-cache, charges will be kept at (g) because
- remove_from_swap_cache() isn't called at (e-1)
-
- memcg provides following hooks.
-
- - mem_cgroup_prepare_migration(OLDPAGE)
- Called after (b) to account a charge (usage += PAGE_SIZE) against
- memcg which OLDPAGE belongs to.
-
- - mem_cgroup_end_migration(OLDPAGE, NEWPAGE)
- Called after (f) before (g).
- If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already
- charged, a charge by prepare_migration() is automatically canceled.
- If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE.
-
- But zap_pte() (by exit or munmap) can be called while migration,
- we have to check if OLDPAGE/NEWPAGE is a valid page after commit().
+
+ mem_cgroup_migrate()
8. LRU
Each memcg has its own private LRU. Now, its handling is under global
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5e0752f7512e..c3ae4631f1b1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1692,8 +1692,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
7 (KERN_DEBUG) debug-level messages
log_buf_len=n[KMG] Sets the size of the printk ring buffer,
- in bytes. n must be a power of two. The default
- size is set in the kernel config file.
+ in bytes. n must be a power of two and greater
+ than the minimal size. The minimal size is defined
+ by LOG_BUF_SHIFT kernel config parameter. There is
+ also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
+ that allows to increase the default size depending on
+ the number of CPUs. See init/Kconfig for more details.
logo.nologo [FB] Disables display of the built-in Linux logo.
This may be used to provide more screen space for
diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt
index 79699c200766..62261c04060a 100644
--- a/Documentation/leds/leds-class.txt
+++ b/Documentation/leds/leds-class.txt
@@ -2,9 +2,6 @@
LED handling under Linux
========================
-If you're reading this and thinking about keyboard leds, these are
-handled by the input subsystem and the led class is *not* needed.
-
In its simplest form, the LED class just allows control of LEDs from
userspace. LEDs appear in /sys/class/leds/. The maximum brightness of the
LED is defined in max_brightness file. The brightness file will set the brightness
diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt
index e3155995ddd8..beefb9f82902 100644
--- a/Documentation/oops-tracing.txt
+++ b/Documentation/oops-tracing.txt
@@ -268,6 +268,8 @@ characters, each representing a particular tainted value.
14: 'E' if an unsigned module has been loaded in a kernel supporting
module signature.
+ 15: 'L' if a soft lockup has previously occurred on the system.
+
The primary reason for the 'Tainted: ' string is to tell kernel
debuggers if this is a clean kernel or if anything unusual has
occurred. Tainting is permanent: even if an offending module is
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index b4498218c474..3b56a991f52e 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -184,6 +184,12 @@ dentry names:
equivalent of %s dentry->d_name.name we used to use, %pd<n> prints
n last components. %pD does the same thing for struct file.
+task_struct comm name:
+
+ %pT
+
+ For printing task_struct->comm.
+
struct va_format:
%pV
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index c14374e71775..f79eb9666379 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -826,6 +826,7 @@ can be ORed together:
4096 - An out-of-tree module has been loaded.
8192 - An unsigned module has been loaded in a kernel supporting module
signature.
+16384 - A soft lockup has previously occurred on the system.
==============================================================
diff --git a/MAINTAINERS b/MAINTAINERS
index 24cb6fff8b6a..8ea43058c4de 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4446,10 +4446,7 @@ S: Supported
F: drivers/scsi/ibmvscsi/ibmvfc*
IBM ServeRAID RAID DRIVER
-P: Jack Hammer
-M: Dave Jeffery <ipslinux@adaptec.com>
-W: http://www.developer.ibm.com/welcome/netfinity/serveraid.html
-S: Supported
+S: Orphan
F: drivers/scsi/ips.*
ICH LPC AND GPIO DRIVER
diff --git a/Makefile b/Makefile
index b597e83bce49..b89ca7023b76 100644
--- a/Makefile
+++ b/Makefile
@@ -631,6 +631,9 @@ else
KBUILD_CFLAGS += -O2
endif
+# Tell gcc to never replace conditional load with a non-conditional one
+KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0)
+
ifdef CONFIG_READABLE_ASM
# Disable optimizations that make assembler listings hard to read.
# reorder blocks reorders the control in the function
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 96e54bed5088..e858aa0ad8af 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/alpha/include/asm/scatterlist.h b/arch/alpha/include/asm/scatterlist.h
deleted file mode 100644
index 017d7471c3c4..000000000000
--- a/arch/alpha/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ALPHA_SCATTERLIST_H
-#define _ALPHA_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !(_ALPHA_SCATTERLIST_H) */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 245058b3b0ef..b9679c8ca8ed 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -83,6 +83,7 @@ config ARM
<http://www.arm.linux.org.uk/>.
config ARM_HAS_SG_CHAIN
+ select ARCH_HAS_SG_CHAIN
bool
config NEED_SG_DMA_LENGTH
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index f5a357601983..70cd84eb7fda 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += poll.h
generic-y += preempt.h
generic-y += resource.h
generic-y += rwsem.h
+generic-y += scatterlist.h
generic-y += sections.h
generic-y += segment.h
generic-y += sembuf.h
diff --git a/arch/arm/include/asm/scatterlist.h b/arch/arm/include/asm/scatterlist.h
deleted file mode 100644
index cefdb8f898a1..000000000000
--- a/arch/arm/include/asm/scatterlist.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASMARM_SCATTERLIST_H
-#define _ASMARM_SCATTERLIST_H
-
-#ifdef CONFIG_ARM_HAS_SG_CHAIN
-#define ARCH_HAS_SG_CHAIN
-#endif
-
-#include <asm/memory.h>
-#include <asm/types.h>
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASMARM_SCATTERLIST_H */
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 4c88935654ca..3116880b7874 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -26,6 +26,7 @@
#include <linux/io.h>
#include <linux/vmalloc.h>
#include <linux/sizes.h>
+#include <linux/cma.h>
#include <asm/memory.h>
#include <asm/highmem.h>
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a474de346be6..65317fb7f717 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2,6 +2,7 @@ config ARM64
def_bool y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_OPP
+ select ARCH_HAS_SG_CHAIN
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index afff5105909d..31742dfadff9 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += linkage.h
generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vga.h
generic-y += xor.h
diff --git a/arch/cris/include/asm/scatterlist.h b/arch/cris/include/asm/scatterlist.h
deleted file mode 100644
index f11f8f40ec4a..000000000000
--- a/arch/cris/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_CRIS_SCATTERLIST_H
-#define __ASM_CRIS_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !(__ASM_CRIS_SCATTERLIST_H) */
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 87b95eb8aee5..5b73921b6e9d 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -5,4 +5,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/frv/include/asm/scatterlist.h b/arch/frv/include/asm/scatterlist.h
deleted file mode 100644
index 0e5eb3018468..000000000000
--- a/arch/frv/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCATTERLIST_H
-#define _ASM_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !_ASM_SCATTERLIST_H */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 2f3abcf8f6bc..56986a060903 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -27,6 +27,7 @@ config IA64
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_VIRT_CPU_ACCOUNTING
+ select ARCH_HAS_SG_CHAIN
select VIRT_TO_BUS
select ARCH_DISCARD_MEMBLOCK
select GENERIC_IRQ_PROBE
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 0da4aa2602ae..e8317d2d6c8d 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -5,5 +5,6 @@ generic-y += hash.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vtime.h
diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h
deleted file mode 100644
index 08fd93bff1db..000000000000
--- a/arch/ia64/include/asm/scatterlist.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_IA64_SCATTERLIST_H
-#define _ASM_IA64_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_IA64_SCATTERLIST_H */
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 67779a74b62d..accc10a3dc78 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/m32r/include/asm/scatterlist.h b/arch/m32r/include/asm/scatterlist.h
deleted file mode 100644
index 7370b8b6243e..000000000000
--- a/arch/m32r/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_M32R_SCATTERLIST_H
-#define _ASM_M32R_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_M32R_SCATTERLIST_H */
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 3a480b3df0d6..d2263a0bdc3d 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -376,7 +376,6 @@ cache_flush_060 (unsigned long addr, int scope, int cache, unsigned long len)
asmlinkage int
sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
{
- struct vm_area_struct *vma;
int ret = -EINVAL;
if (scope < FLUSH_SCOPE_LINE || scope > FLUSH_SCOPE_ALL ||
@@ -389,16 +388,23 @@ sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
if (!capable(CAP_SYS_ADMIN))
goto out;
} else {
+ struct vm_area_struct *vma;
+ bool invalid;
+
+ /* Check for overflow. */
+ if (addr + len < addr)
+ goto out;
+
/*
* Verify that the specified address region actually belongs
* to this process.
*/
- vma = find_vma (current->mm, addr);
ret = -EINVAL;
- /* Check for overflow. */
- if (addr + len < addr)
- goto out;
- if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end)
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, addr);
+ invalid = !vma || addr < vma->vm_start || addr + len > vma->vm_end;
+ up_read(&current->mm->mmap_sem);
+ if (invalid)
goto out;
}
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 35b3ecaf25d5..27a3acda6c19 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -7,5 +7,6 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += syscalls.h
generic-y += trace_clock.h
diff --git a/arch/microblaze/include/asm/scatterlist.h b/arch/microblaze/include/asm/scatterlist.h
deleted file mode 100644
index 35d786fe93ae..000000000000
--- a/arch/microblaze/include/asm/scatterlist.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/scatterlist.h>
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 654d5ba6e310..ecbd6676bd33 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/mn10300/include/asm/scatterlist.h b/arch/mn10300/include/asm/scatterlist.h
deleted file mode 100644
index 7baa4006008a..000000000000
--- a/arch/mn10300/include/asm/scatterlist.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* MN10300 Scatterlist definitions
- *
- * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#ifndef _ASM_SCATTERLIST_H
-#define _ASM_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_SCATTERLIST_H */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bd6dd6ed3a9f..e5b91709fe95 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -111,6 +111,7 @@ config PPC
select HAVE_DMA_API_DEBUG
select HAVE_OPROFILE
select HAVE_DEBUG_KMEMLEAK
+ select ARCH_HAS_SG_CHAIN
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 3fb1bc432f4f..7f23f162ce9c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,5 +4,6 @@ generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
generic-y += rwsem.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vtime.h
diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h
deleted file mode 100644
index de1f620bd5c9..000000000000
--- a/arch/powerpc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _ASM_POWERPC_SCATTERLIST_H
-#define _ASM_POWERPC_SCATTERLIST_H
-/*
- * Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/dma.h>
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_POWERPC_SCATTERLIST_H */
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index ce569b6bf4d8..72905c30082e 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -90,7 +90,6 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
book3s_hv_builtin.o \
- book3s_hv_cma.o \
$(kvm-book3s_64-builtin-xics-objs-y)
endif
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 80561074078d..a41e62580015 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,6 @@
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
-#include "book3s_hv_cma.h"
-
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
@@ -64,10 +62,10 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
}
kvm->arch.hpt_cma_alloc = 0;
- VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ memset((void *)hpt, 0, (1 << order));
kvm->arch.hpt_cma_alloc = 1;
}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7cde8a665205..6cf498a9bc98 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,12 +16,14 @@
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/sizes.h>
+#include <linux/cma.h>
#include <asm/cputable.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include "book3s_hv_cma.h"
+#define KVM_CMA_CHUNK_ORDER 18
+
/*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
* should be power of 2.
@@ -43,6 +45,8 @@ static unsigned long kvm_cma_resv_ratio = 5;
unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
EXPORT_SYMBOL_GPL(kvm_rma_pages);
+static struct cma *kvm_cma;
+
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Assumes POWER7 or PPC970. */
static inline int lpcr_rmls(unsigned long rma_size)
@@ -97,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma()
ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
if (!ri)
return NULL;
- page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+ page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
if (!page)
goto err_out;
atomic_set(&ri->use_count, 1);
@@ -112,7 +116,7 @@ EXPORT_SYMBOL_GPL(kvm_alloc_rma);
void kvm_release_rma(struct kvm_rma_info *ri)
{
if (atomic_dec_and_test(&ri->use_count)) {
- kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+ cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
kfree(ri);
}
}
@@ -131,16 +135,18 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages)
{
unsigned long align_pages = HPT_ALIGN_PAGES;
+ VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
/* Old CPUs require HPT aligned on a multiple of its size */
if (!cpu_has_feature(CPU_FTR_ARCH_206))
align_pages = nr_pages;
- return kvm_alloc_cma(nr_pages, align_pages);
+ return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
}
EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
void kvm_release_hpt(struct page *page, unsigned long nr_pages)
{
- kvm_release_cma(page, nr_pages);
+ cma_release(kvm_cma, page, nr_pages);
}
EXPORT_SYMBOL_GPL(kvm_release_hpt);
@@ -179,7 +185,8 @@ void __init kvm_cma_reserve(void)
align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
- kvm_cma_declare_contiguous(selected_size, align_size);
+ cma_declare_contiguous(0, selected_size, 0, align_size,
+ KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
}
}
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
deleted file mode 100644
index d9d3d8553d51..000000000000
--- a/arch/powerpc/kvm/book3s_hv_cma.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-#define pr_fmt(fmt) "kvm_cma: " fmt
-
-#ifdef CONFIG_CMA_DEBUG
-#ifndef DEBUG
-# define DEBUG
-#endif
-#endif
-
-#include <linux/memblock.h>
-#include <linux/mutex.h>
-#include <linux/sizes.h>
-#include <linux/slab.h>
-
-#include "book3s_hv_cma.h"
-
-struct kvm_cma {
- unsigned long base_pfn;
- unsigned long count;
- unsigned long *bitmap;
-};
-
-static DEFINE_MUTEX(kvm_cma_mutex);
-static struct kvm_cma kvm_cma_area;
-
-/**
- * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
- * for kvm hash pagetable
- * @size: Size of the reserved memory.
- * @alignment: Alignment for the contiguous memory area
- *
- * This function reserves memory for kvm cma area. It should be
- * called by arch code when early allocator (memblock or bootmem)
- * is still activate.
- */
-long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
-{
- long base_pfn;
- phys_addr_t addr;
- struct kvm_cma *cma = &kvm_cma_area;
-
- pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
-
- if (!size)
- return -EINVAL;
- /*
- * Sanitise input arguments.
- * We should be pageblock aligned for CMA.
- */
- alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
- size = ALIGN(size, alignment);
- /*
- * Reserve memory
- * Use __memblock_alloc_base() since
- * memblock_alloc_base() panic()s.
- */
- addr = __memblock_alloc_base(size, alignment, 0);
- if (!addr) {
- base_pfn = -ENOMEM;
- goto err;
- } else
- base_pfn = PFN_DOWN(addr);
-
- /*
- * Each reserved area must be initialised later, when more kernel
- * subsystems (like slab allocator) are available.
- */
- cma->base_pfn = base_pfn;
- cma->count = size >> PAGE_SHIFT;
- pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
- return 0;
-err:
- pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
- return base_pfn;
-}
-
-/**
- * kvm_alloc_cma() - allocate pages from contiguous area
- * @nr_pages: Requested number of pages.
- * @align_pages: Requested alignment in number of pages
- *
- * This function allocates memory buffer for hash pagetable.
- */
-struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
-{
- int ret;
- struct page *page = NULL;
- struct kvm_cma *cma = &kvm_cma_area;
- unsigned long chunk_count, nr_chunk;
- unsigned long mask, pfn, pageno, start = 0;
-
-
- if (!cma || !cma->count)
- return NULL;
-
- pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
- (void *)cma, nr_pages, align_pages);
-
- if (!nr_pages)
- return NULL;
- /*
- * align mask with chunk size. The bit tracks pages in chunk size
- */
- VM_BUG_ON(!is_power_of_2(align_pages));
- mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
- BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
-
- chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
- nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-
- mutex_lock(&kvm_cma_mutex);
- for (;;) {
- pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
- start, nr_chunk, mask);
- if (pageno >= chunk_count)
- break;
-
- pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
- ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
- if (ret == 0) {
- bitmap_set(cma->bitmap, pageno, nr_chunk);
- page = pfn_to_page(pfn);
- memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
- break;
- } else if (ret != -EBUSY) {
- break;
- }
- pr_debug("%s(): memory range at %p is busy, retrying\n",
- __func__, pfn_to_page(pfn));
- /* try again with a bit different memory target */
- start = pageno + mask + 1;
- }
- mutex_unlock(&kvm_cma_mutex);
- pr_debug("%s(): returned %p\n", __func__, page);
- return page;
-}
-
-/**
- * kvm_release_cma() - release allocated pages for hash pagetable
- * @pages: Allocated pages.
- * @nr_pages: Number of allocated pages.
- *
- * This function releases memory allocated by kvm_alloc_cma().
- * It returns false when provided pages do not belong to contiguous area and
- * true otherwise.
- */
-bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
-{
- unsigned long pfn;
- unsigned long nr_chunk;
- struct kvm_cma *cma = &kvm_cma_area;
-
- if (!cma || !pages)
- return false;
-
- pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
-
- pfn = page_to_pfn(pages);
-
- if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
- return false;
-
- VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
- nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-
- mutex_lock(&kvm_cma_mutex);
- bitmap_clear(cma->bitmap,
- (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
- nr_chunk);
- free_contig_range(pfn, nr_pages);
- mutex_unlock(&kvm_cma_mutex);
-
- return true;
-}
-
-static int __init kvm_cma_activate_area(unsigned long base_pfn,
- unsigned long count)
-{
- unsigned long pfn = base_pfn;
- unsigned i = count >> pageblock_order;
- struct zone *zone;
-
- WARN_ON_ONCE(!pfn_valid(pfn));
- zone = page_zone(pfn_to_page(pfn));
- do {
- unsigned j;
- base_pfn = pfn;
- for (j = pageblock_nr_pages; j; --j, pfn++) {
- WARN_ON_ONCE(!pfn_valid(pfn));
- /*
- * alloc_contig_range requires the pfn range
- * specified to be in the same zone. Make this
- * simple by forcing the entire CMA resv range
- * to be in the same zone.
- */
- if (page_zone(pfn_to_page(pfn)) != zone)
- return -EINVAL;
- }
- init_cma_reserved_pageblock(pfn_to_page(base_pfn));
- } while (--i);
- return 0;
-}
-
-static int __init kvm_cma_init_reserved_areas(void)
-{
- int bitmap_size, ret;
- unsigned long chunk_count;
- struct kvm_cma *cma = &kvm_cma_area;
-
- pr_debug("%s()\n", __func__);
- if (!cma->count)
- return 0;
- chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
- bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
- cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!cma->bitmap)
- return -ENOMEM;
-
- ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
- if (ret)
- goto error;
- return 0;
-
-error:
- kfree(cma->bitmap);
- return ret;
-}
-core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
deleted file mode 100644
index 655144f75fa5..000000000000
--- a/arch/powerpc/kvm/book3s_hv_cma.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-
-#ifndef __POWERPC_KVM_CMA_ALLOC_H__
-#define __POWERPC_KVM_CMA_ALLOC_H__
-/*
- * Both RMA and Hash page allocation will be multiple of 256K.
- */
-#define KVM_CMA_CHUNK_ORDER 18
-
-extern struct page *kvm_alloc_cma(unsigned long nr_pages,
- unsigned long align_pages);
-extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
-extern long kvm_cma_declare_contiguous(phys_addr_t size,
- phys_addr_t alignment) __init;
-#endif
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 7b6c10750179..d85e86aac7fb 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -33,6 +33,7 @@
#include <linux/export.h>
#include <asm/tlbflush.h>
+#include <asm/dma.h>
#include "mmu_decl.h"
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 534574a97ec9..3a104284b338 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -25,6 +25,7 @@
#include <asm/time.h>
#include <asm/uic.h>
#include <asm/ppc4xx.h>
+#include <asm/dma.h>
static __initdata struct of_device_id warp_of_bus[] = {
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 6e19b0ad5d26..3feffde9128d 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -13,6 +13,7 @@
#include <generated/utsrelease.h>
#include <linux/pci.h>
#include <linux/of.h>
+#include <asm/dma.h>
#include <asm/prom.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 03aabc0e16ac..2fe12046279e 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -24,6 +24,7 @@
#include <asm/i8259.h>
#include <asm/time.h>
#include <asm/udbg.h>
+#include <asm/dma.h>
extern void __flush_disable_L1(void);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bb63499fc5d3..d12d40ee8cbe 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -146,6 +146,7 @@ config S390
select TTY
select VIRT_CPU_ACCOUNTING
select VIRT_TO_BUS
+ select ARCH_HAS_SG_CHAIN
config SCHED_OMIT_FRAME_POINTER
def_bool y
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 57892a8a9055..b3fea0722ff1 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -4,4 +4,5 @@ generic-y += clkdev.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
deleted file mode 100644
index 6d45ef6c12a7..000000000000
--- a/arch/s390/include/asm/scatterlist.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 2f947aba4bd4..aad209199f7e 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -8,5 +8,6 @@ generic-y += cputime.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += xor.h
diff --git a/arch/score/include/asm/scatterlist.h b/arch/score/include/asm/scatterlist.h
deleted file mode 100644
index 9f533b8362c7..000000000000
--- a/arch/score/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCORE_SCATTERLIST_H
-#define _ASM_SCORE_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_SCORE_SCATTERLIST_H */
diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig
index cfd5b90a8628..78bc97b1d027 100644
--- a/arch/sh/drivers/dma/Kconfig
+++ b/arch/sh/drivers/dma/Kconfig
@@ -12,9 +12,8 @@ config SH_DMA_IRQ_MULTI
default y if CPU_SUBTYPE_SH7750 || CPU_SUBTYPE_SH7751 || \
CPU_SUBTYPE_SH7750S || CPU_SUBTYPE_SH7750R || \
CPU_SUBTYPE_SH7751R || CPU_SUBTYPE_SH7091 || \
- CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7764 || \
- CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \
- CPU_SUBTYPE_SH7760
+ CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \
+ CPU_SUBTYPE_SH7785 || CPU_SUBTYPE_SH7760
config SH_DMA_API
depends on SH_DMA
diff --git a/arch/sh/include/cpu-sh4/cpu/dma-register.h b/arch/sh/include/cpu-sh4/cpu/dma-register.h
index 02788b6a03b7..9cd81e54056a 100644
--- a/arch/sh/include/cpu-sh4/cpu/dma-register.h
+++ b/arch/sh/include/cpu-sh4/cpu/dma-register.h
@@ -32,7 +32,6 @@
#define CHCR_TS_HIGH_SHIFT (20 - 2) /* 2 bits for shifted low TS */
#elif defined(CONFIG_CPU_SUBTYPE_SH7757) || \
defined(CONFIG_CPU_SUBTYPE_SH7763) || \
- defined(CONFIG_CPU_SUBTYPE_SH7764) || \
defined(CONFIG_CPU_SUBTYPE_SH7780) || \
defined(CONFIG_CPU_SUBTYPE_SH7785)
#define CHCR_TS_LOW_MASK 0x00000018
diff --git a/arch/sh/include/cpu-sh4a/cpu/dma.h b/arch/sh/include/cpu-sh4a/cpu/dma.h
index 89afb650ce25..8ceccceae844 100644
--- a/arch/sh/include/cpu-sh4a/cpu/dma.h
+++ b/arch/sh/include/cpu-sh4a/cpu/dma.h
@@ -14,8 +14,7 @@
#define DMTE4_IRQ evt2irq(0xb80)
#define DMAE0_IRQ evt2irq(0xbc0) /* DMA Error IRQ*/
#define SH_DMAC_BASE0 0xFE008020
-#elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \
- defined(CONFIG_CPU_SUBTYPE_SH7764)
+#elif defined(CONFIG_CPU_SUBTYPE_SH7763)
#define DMTE0_IRQ evt2irq(0x640)
#define DMTE4_IRQ evt2irq(0x780)
#define DMAE0_IRQ evt2irq(0x6c0)
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 29f2e988c56a..e1ea0ff154d7 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,6 +42,7 @@ config SPARC
select MODULES_USE_ELF_RELA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
+ select ARCH_HAS_SG_CHAIN
config SPARC32
def_bool !64BIT
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a45821818003..cdd1b447bb6c 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += mutex.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += serial.h
generic-y += trace_clock.h
generic-y += types.h
diff --git a/arch/sparc/include/asm/scatterlist.h b/arch/sparc/include/asm/scatterlist.h
deleted file mode 100644
index 92bb638313f8..000000000000
--- a/arch/sparc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _SPARC_SCATTERLIST_H
-#define _SPARC_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* !(_SPARC_SCATTERLIST_H) */
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index a5e4b6068213..7bd64aa2e94a 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += param.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += sections.h
generic-y += switch_to.h
generic-y += topology.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index be0d37e7bb70..4431cceeb393 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -96,6 +96,7 @@ config X86
select IRQ_FORCED_THREADING
select HAVE_BPF_JIT if X86_64
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select ARCH_HAS_SG_CHAIN
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 3ca9762e1649..3bf000fab0ae 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,6 +5,7 @@ genhdr-y += unistd_64.h
genhdr-y += unistd_x32.h
generic-y += clkdev.h
-generic-y += early_ioremap.h
generic-y += cputime.h
+generic-y += early_ioremap.h
generic-y += mcs_spinlock.h
+generic-y += scatterlist.h
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 5be9063545d2..809abb335627 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -115,7 +115,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
native_set_pgd(pgd, native_make_pgd(0));
}
-extern void sync_global_pgds(unsigned long start, unsigned long end);
+extern void sync_global_pgds(unsigned long start, unsigned long end,
+ int removed);
/*
* Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
deleted file mode 100644
index 4240878b9d76..000000000000
--- a/arch/x86/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_X86_SCATTERLIST_H
-#define _ASM_X86_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1dbade870f90..939cb8c8b02e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -350,7 +350,7 @@ out:
void vmalloc_sync_all(void)
{
- sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
+ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0);
}
/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index df1a9927ad29..8f6803290083 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -178,7 +178,7 @@ __setup("noexec32=", nonx32_setup);
* When memory was added/removed make sure all the processes MM have
* suitable PGD entries in the local PGD level page.
*/
-void sync_global_pgds(unsigned long start, unsigned long end)
+void sync_global_pgds(unsigned long start, unsigned long end, int removed)
{
unsigned long address;
@@ -186,7 +186,12 @@ void sync_global_pgds(unsigned long start, unsigned long end)
const pgd_t *pgd_ref = pgd_offset_k(address);
struct page *page;
- if (pgd_none(*pgd_ref))
+ /*
+ * When it is called after memory hot remove, pgd_none()
+ * returns true. In this case (removed == 1), we must clear
+ * the PGD entries in the local PGD level page.
+ */
+ if (pgd_none(*pgd_ref) && !removed)
continue;
spin_lock(&pgd_lock);
@@ -199,12 +204,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
+ if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
BUG_ON(pgd_page_vaddr(*pgd)
!= pgd_page_vaddr(*pgd_ref));
+ if (removed) {
+ if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
+ pgd_clear(pgd);
+ } else {
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ }
+
spin_unlock(pgt_lock);
}
spin_unlock(&pgd_lock);
@@ -633,7 +644,7 @@ kernel_physical_mapping_init(unsigned long start,
}
if (pgd_changed)
- sync_global_pgds(addr, end - 1);
+ sync_global_pgds(addr, end - 1, 0);
__flush_tlb_all();
@@ -975,25 +986,26 @@ static void __meminit
remove_pagetable(unsigned long start, unsigned long end, bool direct)
{
unsigned long next;
+ unsigned long addr;
pgd_t *pgd;
pud_t *pud;
bool pgd_changed = false;
- for (; start < end; start = next) {
- next = pgd_addr_end(start, end);
+ for (addr = start; addr < end; addr = next) {
+ next = pgd_addr_end(addr, end);
- pgd = pgd_offset_k(start);
+ pgd = pgd_offset_k(addr);
if (!pgd_present(*pgd))
continue;
pud = (pud_t *)pgd_page_vaddr(*pgd);
- remove_pud_table(pud, start, next, direct);
+ remove_pud_table(pud, addr, next, direct);
if (free_pud_table(pud, pgd))
pgd_changed = true;
}
if (pgd_changed)
- sync_global_pgds(start, end - 1);
+ sync_global_pgds(start, end - 1, 1);
flush_tlb_all();
}
@@ -1340,7 +1352,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
else
err = vmemmap_populate_basepages(start, end, node);
if (!err)
- sync_global_pgds(start, end - 1);
+ sync_global_pgds(start, end - 1, 0);
return err;
}
diff --git a/block/bio.c b/block/bio.c
index 3e6331d25d90..a227a0c9a98e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -761,29 +761,31 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
return 0;
/*
- * we might lose a segment or two here, but rather that than
- * make this too complex.
+ * setup the new entry, we might clear it again later if we
+ * cannot add the page
+ */
+ bvec = &bio->bi_io_vec[bio->bi_vcnt];
+ bvec->bv_page = page;
+ bvec->bv_len = len;
+ bvec->bv_offset = offset;
+ bio->bi_vcnt++;
+ bio->bi_phys_segments++;
+
+ /*
+ * Perform a recount if the number of segments is greater
+ * than queue_max_segments(q).
*/
- while (bio->bi_phys_segments >= queue_max_segments(q)) {
+ while (bio->bi_phys_segments > queue_max_segments(q)) {
if (retried_segments)
- return 0;
+ goto failed;
retried_segments = 1;
blk_recount_segments(q, bio);
}
/*
- * setup the new entry, we might clear it again later if we
- * cannot add the page
- */
- bvec = &bio->bi_io_vec[bio->bi_vcnt];
- bvec->bv_page = page;
- bvec->bv_len = len;
- bvec->bv_offset = offset;
-
- /*
* if queue has other restrictions (eg varying max sector size
* depending on offset), it can specify a merge_bvec_fn in the
* queue to get further control
@@ -800,23 +802,25 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
* merge_bvec_fn() returns number of bytes it can accept
* at this offset
*/
- if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
- bvec->bv_page = NULL;
- bvec->bv_len = 0;
- bvec->bv_offset = 0;
- return 0;
- }
+ if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len)
+ goto failed;
}
/* If we may be able to merge these biovecs, force a recount */
- if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
+ if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
bio->bi_flags &= ~(1 << BIO_SEG_VALID);
- bio->bi_vcnt++;
- bio->bi_phys_segments++;
done:
bio->bi_iter.bi_size += len;
return len;
+
+ failed:
+ bvec->bv_page = NULL;
+ bvec->bv_len = 0;
+ bvec->bv_offset = 0;
+ bio->bi_vcnt--;
+ blk_recount_segments(q, bio);
+ return 0;
}
/**
diff --git a/block/genhd.c b/block/genhd.c
index 791f41943132..7bd4372e8b6f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -849,7 +849,7 @@ static int show_partition(struct seq_file *seqf, void *v)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+ if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
diff --git a/crypto/zlib.c b/crypto/zlib.c
index 06b62e5cdcc7..c9ee681d57fd 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -168,7 +168,7 @@ static int zlib_compress_update(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
@@ -198,7 +198,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
@@ -283,7 +283,7 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
@@ -331,7 +331,7 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 7671dbac6015..b0d5b5a12147 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -16,6 +16,7 @@ menuconfig ATA
depends on BLOCK
depends on !(M32R || M68K || S390) || BROKEN
select SCSI
+ select GLOB
---help---
If you want to use an ATA hard disk, ATA tape drive, ATA CD-ROM or
any other ATA device under Linux, say Y and make sure that you know
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 18d97d5c7d90..8f3043165048 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -59,6 +59,7 @@
#include <linux/async.h>
#include <linux/log2.h>
#include <linux/slab.h>
+#include <linux/glob.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_host.h>
@@ -4250,73 +4251,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
{ }
};
-/**
- * glob_match - match a text string against a glob-style pattern
- * @text: the string to be examined
- * @pattern: the glob-style pattern to be matched against
- *
- * Either/both of text and pattern can be empty strings.
- *
- * Match text against a glob-style pattern, with wildcards and simple sets:
- *
- * ? matches any single character.
- * * matches any run of characters.
- * [xyz] matches a single character from the set: x, y, or z.
- * [a-d] matches a single character from the range: a, b, c, or d.
- * [a-d0-9] matches a single character from either range.
- *
- * The special characters ?, [, -, or *, can be matched using a set, eg. [*]
- * Behaviour with malformed patterns is undefined, though generally reasonable.
- *
- * Sample patterns: "SD1?", "SD1[0-5]", "*R0", "SD*1?[012]*xx"
- *
- * This function uses one level of recursion per '*' in pattern.
- * Since it calls _nothing_ else, and has _no_ explicit local variables,
- * this will not cause stack problems for any reasonable use here.
- *
- * RETURNS:
- * 0 on match, 1 otherwise.
- */
-static int glob_match (const char *text, const char *pattern)
-{
- do {
- /* Match single character or a '?' wildcard */
- if (*text == *pattern || *pattern == '?') {
- if (!*pattern++)
- return 0; /* End of both strings: match */
- } else {
- /* Match single char against a '[' bracketed ']' pattern set */
- if (!*text || *pattern != '[')
- break; /* Not a pattern set */
- while (*++pattern && *pattern != ']' && *text != *pattern) {
- if (*pattern == '-' && *(pattern - 1) != '[')
- if (*text > *(pattern - 1) && *text < *(pattern + 1)) {
- ++pattern;
- break;
- }
- }
- if (!*pattern || *pattern == ']')
- return 1; /* No match */
- while (*pattern && *pattern++ != ']');
- }
- } while (*++text && *pattern);
-
- /* Match any run of chars against a '*' wildcard */
- if (*pattern == '*') {
- if (!*++pattern)
- return 0; /* Match: avoid recursion at end of pattern */
- /* Loop to handle additional pattern chars after the wildcard */
- while (*text) {
- if (glob_match(text, pattern) == 0)
- return 0; /* Remainder matched */
- ++text; /* Absorb (match) this char and try again */
- }
- }
- if (!*text && !*pattern)
- return 0; /* End of both strings: match */
- return 1; /* No match */
-}
-
static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
{
unsigned char model_num[ATA_ID_PROD_LEN + 1];
@@ -4327,10 +4261,10 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
ata_id_c_string(dev->id, model_rev, ATA_ID_FW_REV, sizeof(model_rev));
while (ad->model_num) {
- if (!glob_match(model_num, ad->model_num)) {
+ if (glob_match(model_num, ad->model_num)) {
if (ad->model_rev == NULL)
return ad->horkage;
- if (!glob_match(model_rev, ad->model_rev))
+ if (glob_match(model_rev, ad->model_rev))
return ad->horkage;
}
ad++;
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 23b8726962af..9d5fed17491f 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -274,16 +274,6 @@ config CMA_ALIGNMENT
If unsure, leave the default value "8".
-config CMA_AREAS
- int "Maximum count of the CMA device-private areas"
- default 7
- help
- CMA allows to create CMA areas for particular devices. This parameter
- sets the maximum number of such device private CMA areas in the
- system.
-
- If unsure, leave the default value "7".
-
endif
endmenu
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 6467c919c509..6606abdf880c 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -24,23 +24,9 @@
#include <linux/memblock.h>
#include <linux/err.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/page-isolation.h>
#include <linux/sizes.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/mm_types.h>
#include <linux/dma-contiguous.h>
-
-struct cma {
- unsigned long base_pfn;
- unsigned long count;
- unsigned long *bitmap;
- struct mutex lock;
-};
-
-struct cma *dma_contiguous_default_area;
+#include <linux/cma.h>
#ifdef CONFIG_CMA_SIZE_MBYTES
#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
@@ -48,6 +34,8 @@ struct cma *dma_contiguous_default_area;
#define CMA_SIZE_MBYTES 0
#endif
+struct cma *dma_contiguous_default_area;
+
/*
* Default global CMA area size can be defined in kernel's .config.
* This is useful mainly for distro maintainers to create a kernel
@@ -154,65 +142,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
}
}
-static DEFINE_MUTEX(cma_mutex);
-
-static int __init cma_activate_area(struct cma *cma)
-{
- int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
- unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
- unsigned i = cma->count >> pageblock_order;
- struct zone *zone;
-
- cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-
- if (!cma->bitmap)
- return -ENOMEM;
-
- WARN_ON_ONCE(!pfn_valid(pfn));
- zone = page_zone(pfn_to_page(pfn));
-
- do {
- unsigned j;
- base_pfn = pfn;
- for (j = pageblock_nr_pages; j; --j, pfn++) {
- WARN_ON_ONCE(!pfn_valid(pfn));
- /*
- * alloc_contig_range requires the pfn range
- * specified to be in the same zone. Make this
- * simple by forcing the entire CMA resv range
- * to be in the same zone.
- */
- if (page_zone(pfn_to_page(pfn)) != zone)
- goto err;
- }
- init_cma_reserved_pageblock(pfn_to_page(base_pfn));
- } while (--i);
-
- mutex_init(&cma->lock);
- return 0;
-
-err:
- kfree(cma->bitmap);
- return -EINVAL;
-}
-
-static struct cma cma_areas[MAX_CMA_AREAS];
-static unsigned cma_area_count;
-
-static int __init cma_init_reserved_areas(void)
-{
- int i;
-
- for (i = 0; i < cma_area_count; i++) {
- int ret = cma_activate_area(&cma_areas[i]);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-core_initcall(cma_init_reserved_areas);
-
/**
* dma_contiguous_reserve_area() - reserve custom contiguous area
* @size: Size of the reserved area (in bytes),
@@ -234,72 +163,17 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
phys_addr_t limit, struct cma **res_cma,
bool fixed)
{
- struct cma *cma = &cma_areas[cma_area_count];
- phys_addr_t alignment;
- int ret = 0;
-
- pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
- (unsigned long)size, (unsigned long)base,
- (unsigned long)limit);
-
- /* Sanity checks */
- if (cma_area_count == ARRAY_SIZE(cma_areas)) {
- pr_err("Not enough slots for CMA reserved regions!\n");
- return -ENOSPC;
- }
-
- if (!size)
- return -EINVAL;
-
- /* Sanitise input arguments */
- alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
- base = ALIGN(base, alignment);
- size = ALIGN(size, alignment);
- limit &= ~(alignment - 1);
-
- /* Reserve memory */
- if (base && fixed) {
- if (memblock_is_region_reserved(base, size) ||
- memblock_reserve(base, size) < 0) {
- ret = -EBUSY;
- goto err;
- }
- } else {
- phys_addr_t addr = memblock_alloc_range(size, alignment, base,
- limit);
- if (!addr) {
- ret = -ENOMEM;
- goto err;
- } else {
- base = addr;
- }
- }
-
- /*
- * Each reserved area must be initialised later, when more kernel
- * subsystems (like slab allocator) are available.
- */
- cma->base_pfn = PFN_DOWN(base);
- cma->count = size >> PAGE_SHIFT;
- *res_cma = cma;
- cma_area_count++;
+ int ret;
- pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
- (unsigned long)base);
+ ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma);
+ if (ret)
+ return ret;
/* Architecture specific contiguous memory fixup. */
- dma_contiguous_early_fixup(base, size);
- return 0;
-err:
- pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
- return ret;
-}
+ dma_contiguous_early_fixup(cma_get_base(*res_cma),
+ cma_get_size(*res_cma));
-static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
-{
- mutex_lock(&cma->lock);
- bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
- mutex_unlock(&cma->lock);
+ return 0;
}
/**
@@ -316,62 +190,10 @@ static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
struct page *dma_alloc_from_contiguous(struct device *dev, int count,
unsigned int align)
{
- unsigned long mask, pfn, pageno, start = 0;
- struct cma *cma = dev_get_cma_area(dev);
- struct page *page = NULL;
- int ret;
-
- if (!cma || !cma->count)
- return NULL;
-
if (align > CONFIG_CMA_ALIGNMENT)
align = CONFIG_CMA_ALIGNMENT;
- pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
- count, align);
-
- if (!count)
- return NULL;
-
- mask = (1 << align) - 1;
-
-
- for (;;) {
- mutex_lock(&cma->lock);
- pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
- start, count, mask);
- if (pageno >= cma->count) {
- mutex_unlock(&cma->lock);
- break;
- }
- bitmap_set(cma->bitmap, pageno, count);
- /*
- * It's safe to drop the lock here. We've marked this region for
- * our exclusive use. If the migration fails we will take the
- * lock again and unmark it.
- */
- mutex_unlock(&cma->lock);
-
- pfn = cma->base_pfn + pageno;
- mutex_lock(&cma_mutex);
- ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
- mutex_unlock(&cma_mutex);
- if (ret == 0) {
- page = pfn_to_page(pfn);
- break;
- } else if (ret != -EBUSY) {
- clear_cma_bitmap(cma, pfn, count);
- break;
- }
- clear_cma_bitmap(cma, pfn, count);
- pr_debug("%s(): memory range at %p is busy, retrying\n",
- __func__, pfn_to_page(pfn));
- /* try again with a bit different memory target */
- start = pageno + mask + 1;
- }
-
- pr_debug("%s(): returned %p\n", __func__, page);
- return page;
+ return cma_alloc(dev_get_cma_area(dev), count, align);
}
/**
@@ -387,23 +209,5 @@ struct page *dma_alloc_from_contiguous(struct device *dev, int count,
bool dma_release_from_contiguous(struct device *dev, struct page *pages,
int count)
{
- struct cma *cma = dev_get_cma_area(dev);
- unsigned long pfn;
-
- if (!cma || !pages)
- return false;
-
- pr_debug("%s(page %p)\n", __func__, (void *)pages);
-
- pfn = page_to_pfn(pages);
-
- if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
- return false;
-
- VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
-
- free_contig_range(pfn, count);
- clear_cma_bitmap(cma, pfn, count);
-
- return true;
+ return cma_release(dev_get_cma_area(dev), pages, count);
}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 89f752dd8465..a2e13e250bba 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -284,7 +284,7 @@ static int memory_subsys_online(struct device *dev)
* attribute and need to set the online_type.
*/
if (mem->online_type < 0)
- mem->online_type = ONLINE_KEEP;
+ mem->online_type = MMOP_ONLINE_KEEP;
ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
@@ -315,23 +315,23 @@ store_mem_state(struct device *dev,
if (ret)
return ret;
- if (!strncmp(buf, "online_kernel", min_t(int, count, 13)))
- online_type = ONLINE_KERNEL;
- else if (!strncmp(buf, "online_movable", min_t(int, count, 14)))
- online_type = ONLINE_MOVABLE;
- else if (!strncmp(buf, "online", min_t(int, count, 6)))
- online_type = ONLINE_KEEP;
- else if (!strncmp(buf, "offline", min_t(int, count, 7)))
- online_type = -1;
+ if (sysfs_streq(buf, "online_kernel"))
+ online_type = MMOP_ONLINE_KERNEL;
+ else if (sysfs_streq(buf, "online_movable"))
+ online_type = MMOP_ONLINE_MOVABLE;
+ else if (sysfs_streq(buf, "online"))
+ online_type = MMOP_ONLINE_KEEP;
+ else if (sysfs_streq(buf, "offline"))
+ online_type = MMOP_OFFLINE;
else {
ret = -EINVAL;
goto err;
}
switch (online_type) {
- case ONLINE_KERNEL:
- case ONLINE_MOVABLE:
- case ONLINE_KEEP:
+ case MMOP_ONLINE_KERNEL:
+ case MMOP_ONLINE_MOVABLE:
+ case MMOP_ONLINE_KEEP:
/*
* mem->online_type is not protected so there can be a
* race here. However, when racing online, the first
@@ -342,7 +342,7 @@ store_mem_state(struct device *dev,
mem->online_type = online_type;
ret = device_online(&mem->dev);
break;
- case -1:
+ case MMOP_OFFLINE:
ret = device_offline(&mem->dev);
break;
default:
@@ -406,7 +406,9 @@ memory_probe_store(struct device *dev, struct device_attribute *attr,
int i, ret;
unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
- phys_addr = simple_strtoull(buf, NULL, 0);
+ ret = kstrtoull(buf, 0, &phys_addr);
+ if (ret)
+ return ret;
if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
return -EINVAL;
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 7f21c145e317..c8161bd8969c 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -43,7 +43,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
/*-- End of configurable params */
#define SECTOR_SHIFT 9
-#define SECTOR_SIZE (1 << SECTOR_SHIFT)
#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
#define ZRAM_LOGICAL_BLOCK_SHIFT 12
@@ -62,7 +61,7 @@ enum zram_pageflags {
/*-- Data structures */
/* Allocated for each disk page */
-struct table {
+struct zram_table_entry {
unsigned long handle;
u16 size; /* object size (excluding header) */
u8 flags;
@@ -82,7 +81,7 @@ struct zram_stats {
struct zram_meta {
rwlock_t tb_lock; /* protect table */
- struct table *table;
+ struct zram_table_entry *table;
struct zs_pool *mem_pool;
};
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 7e4bae760e27..c3b80fd65d62 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -125,7 +125,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
parent = &entry->head;
}
if (parent) {
- hlist_add_after_rcu(parent, &item->head);
+ hlist_add_behind_rcu(&item->head, parent);
} else {
hlist_add_head_rcu(&item->head, h_list);
}
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index a11ff74a5127..9eac8de9e8b7 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -178,6 +178,15 @@ comment "Input Device Drivers"
source "drivers/input/keyboard/Kconfig"
+config INPUT_LEDS
+ bool "LED Support"
+ depends on LEDS_CLASS = INPUT || LEDS_CLASS = y
+ select LEDS_TRIGGERS
+ default y
+ help
+ This option enables support for LEDs on keyboards managed
+ by the input layer.
+
source "drivers/input/mouse/Kconfig"
source "drivers/input/joystick/Kconfig"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 5ca3f631497f..2ab5f3336da5 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -6,6 +6,9 @@
obj-$(CONFIG_INPUT) += input-core.o
input-core-y := input.o input-compat.o input-mt.o ff-core.o
+ifeq ($(CONFIG_INPUT_LEDS),y)
+input-core-y += leds.o
+endif
obj-$(CONFIG_INPUT_FF_MEMLESS) += ff-memless.o
obj-$(CONFIG_INPUT_POLLDEV) += input-polldev.o
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 1c4c0db05550..3b9284b18e70 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -708,6 +708,9 @@ static void input_disconnect_device(struct input_dev *dev)
handle->open = 0;
spin_unlock_irq(&dev->event_lock);
+
+ if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+ input_led_disconnect(dev);
}
/**
@@ -2134,6 +2137,9 @@ int input_register_device(struct input_dev *dev)
list_add_tail(&dev->node, &input_dev_list);
+ if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+ input_led_connect(dev);
+
list_for_each_entry(handler, &input_handler_list, node)
input_attach_handler(dev, handler);
diff --git a/drivers/input/leds.c b/drivers/input/leds.c
new file mode 100644
index 000000000000..985fa7ebeec7
--- /dev/null
+++ b/drivers/input/leds.c
@@ -0,0 +1,249 @@
+/*
+ * LED support for the input layer
+ *
+ * Copyright 2010-2014 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/input.h>
+
+/*
+ * Keyboard LEDs are propagated by default like the following example:
+ *
+ * VT keyboard numlock trigger
+ * -> vt::numl VT LED
+ * -> vt-numl VT trigger
+ * -> per-device inputX::numl LED
+ *
+ * Userland can however choose the trigger for the vt::numl LED, or
+ * independently choose the trigger for any inputx::numl LED.
+ *
+ *
+ * VT LED classes and triggers are registered on-demand according to
+ * existing LED devices
+ */
+
+/* Handler for VT LEDs, just triggers the corresponding VT trigger. */
+static void vt_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness);
+static struct led_classdev vt_leds[LED_CNT] = {
+#define DEFINE_INPUT_LED(vt_led, nam, deftrig) \
+ [vt_led] = { \
+ .name = "vt::"nam, \
+ .max_brightness = 1, \
+ .brightness_set = vt_led_set, \
+ .default_trigger = deftrig, \
+ }
+/* Default triggers for the VT LEDs just correspond to the legacy
+ * usage. */
+ DEFINE_INPUT_LED(LED_NUML, "numl", "kbd-numlock"),
+ DEFINE_INPUT_LED(LED_CAPSL, "capsl", "kbd-capslock"),
+ DEFINE_INPUT_LED(LED_SCROLLL, "scrolll", "kbd-scrollock"),
+ DEFINE_INPUT_LED(LED_COMPOSE, "compose", NULL),
+ DEFINE_INPUT_LED(LED_KANA, "kana", "kbd-kanalock"),
+ DEFINE_INPUT_LED(LED_SLEEP, "sleep", NULL),
+ DEFINE_INPUT_LED(LED_SUSPEND, "suspend", NULL),
+ DEFINE_INPUT_LED(LED_MUTE, "mute", NULL),
+ DEFINE_INPUT_LED(LED_MISC, "misc", NULL),
+ DEFINE_INPUT_LED(LED_MAIL, "mail", NULL),
+ DEFINE_INPUT_LED(LED_CHARGING, "charging", NULL),
+};
+static const char *const vt_led_names[LED_CNT] = {
+ [LED_NUML] = "numl",
+ [LED_CAPSL] = "capsl",
+ [LED_SCROLLL] = "scrolll",
+ [LED_COMPOSE] = "compose",
+ [LED_KANA] = "kana",
+ [LED_SLEEP] = "sleep",
+ [LED_SUSPEND] = "suspend",
+ [LED_MUTE] = "mute",
+ [LED_MISC] = "misc",
+ [LED_MAIL] = "mail",
+ [LED_CHARGING] = "charging",
+};
+/* Handler for hotplug initialization */
+static void vt_led_trigger_activate(struct led_classdev *cdev);
+/* VT triggers */
+static struct led_trigger vt_led_triggers[LED_CNT] = {
+#define DEFINE_INPUT_LED_TRIGGER(vt_led, nam) \
+ [vt_led] = { \
+ .name = "vt-"nam, \
+ .activate = vt_led_trigger_activate, \
+ }
+ DEFINE_INPUT_LED_TRIGGER(LED_NUML, "numl"),
+ DEFINE_INPUT_LED_TRIGGER(LED_CAPSL, "capsl"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SCROLLL, "scrolll"),
+ DEFINE_INPUT_LED_TRIGGER(LED_COMPOSE, "compose"),
+ DEFINE_INPUT_LED_TRIGGER(LED_KANA, "kana"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SLEEP, "sleep"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SUSPEND, "suspend"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MUTE, "mute"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MISC, "misc"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MAIL, "mail"),
+ DEFINE_INPUT_LED_TRIGGER(LED_CHARGING, "charging"),
+};
+
+/* Lock for registration coherency */
+static DEFINE_MUTEX(vt_led_registered_lock);
+
+/* Which VT LED classes and triggers are registered */
+static unsigned long vt_led_registered[BITS_TO_LONGS(LED_CNT)];
+
+/* Number of input devices having each LED */
+static int vt_led_references[LED_CNT];
+
+/* VT LED state change, tell the VT trigger. */
+static void vt_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ int led = cdev - vt_leds;
+
+ led_trigger_event(&vt_led_triggers[led], !!brightness);
+}
+
+/* LED state change for some keyboard, notify that keyboard. */
+static void perdevice_input_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ struct input_dev *dev;
+ struct led_classdev *leds;
+ int led;
+
+ dev = cdev->dev->platform_data;
+ if (!dev)
+ /* Still initializing */
+ return;
+ leds = dev->leds;
+ led = cdev - leds;
+
+ input_event(dev, EV_LED, led, !!brightness);
+ input_event(dev, EV_SYN, SYN_REPORT, 0);
+}
+
+/* Keyboard hotplug, initialize its LED status */
+static void vt_led_trigger_activate(struct led_classdev *cdev)
+{
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - vt_led_triggers;
+
+ if (cdev->brightness_set)
+ cdev->brightness_set(cdev, vt_leds[led].brightness);
+}
+
+/* Free led stuff from input device, used at abortion and disconnection. */
+static void input_led_delete(struct input_dev *dev)
+{
+ if (dev) {
+ struct led_classdev *leds = dev->leds;
+ if (leds) {
+ int i;
+ for (i = 0; i < LED_CNT; i++)
+ kfree(leds[i].name);
+ kfree(leds);
+ dev->leds = NULL;
+ }
+ }
+}
+
+/* A new input device with potential LEDs to connect. */
+int input_led_connect(struct input_dev *dev)
+{
+ int i, error = 0;
+ struct led_classdev *leds;
+
+ dev->leds = leds = kcalloc(LED_CNT, sizeof(*leds), GFP_KERNEL);
+ if (!dev->leds)
+ return -ENOMEM;
+
+ /* lazily register missing VT LEDs */
+ mutex_lock(&vt_led_registered_lock);
+ for (i = 0; i < LED_CNT; i++)
+ if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+ if (!vt_led_references[i]) {
+ led_trigger_register(&vt_led_triggers[i]);
+ /* This keyboard is first to have led i,
+ * try to register it */
+ if (!led_classdev_register(NULL, &vt_leds[i]))
+ vt_led_references[i] = 1;
+ else
+ led_trigger_unregister(&vt_led_triggers[i]);
+ } else
+ vt_led_references[i]++;
+ }
+ mutex_unlock(&vt_led_registered_lock);
+
+ /* and register this device's LEDs */
+ for (i = 0; i < LED_CNT; i++)
+ if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+ leds[i].name = kasprintf(GFP_KERNEL, "%s::%s",
+ dev_name(&dev->dev),
+ vt_led_names[i]);
+ if (!leds[i].name) {
+ error = -ENOMEM;
+ goto err;
+ }
+ leds[i].max_brightness = 1;
+ leds[i].brightness_set = perdevice_input_led_set;
+ leds[i].default_trigger = vt_led_triggers[i].name;
+ }
+
+ /* No issue so far, we can register for real. */
+ for (i = 0; i < LED_CNT; i++)
+ if (leds[i].name) {
+ led_classdev_register(&dev->dev, &leds[i]);
+ leds[i].dev->platform_data = dev;
+ perdevice_input_led_set(&leds[i],
+ vt_leds[i].brightness);
+ }
+
+ return 0;
+
+err:
+ input_led_delete(dev);
+ return error;
+}
+
+/*
+ * Disconnected input device. Clean it, and deregister now-useless VT LEDs
+ * and triggers.
+ */
+void input_led_disconnect(struct input_dev *dev)
+{
+ int i;
+ struct led_classdev *leds = dev->leds;
+
+ for (i = 0; i < LED_CNT; i++)
+ if (leds[i].name)
+ led_classdev_unregister(&leds[i]);
+
+ input_led_delete(dev);
+
+ mutex_lock(&vt_led_registered_lock);
+ for (i = 0; i < LED_CNT; i++) {
+ if (!vt_leds[i].name || !test_bit(i, dev->ledbit))
+ continue;
+
+ vt_led_references[i]--;
+ if (vt_led_references[i]) {
+ /* Still some devices needing it */
+ continue;
+ }
+
+ led_classdev_unregister(&vt_leds[i]);
+ led_trigger_unregister(&vt_led_triggers[i]);
+ clear_bit(i, vt_led_registered);
+ }
+ mutex_unlock(&vt_led_registered_lock);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("User LED support for input layer");
+MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>");
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index a1b044e7eaad..6784c177eed1 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -11,9 +11,6 @@ menuconfig NEW_LEDS
Say Y to enable Linux LED support. This allows control of supported
LEDs from both userspace and optionally, by kernel events (triggers).
- This is not related to standard keyboard LEDs which are controlled
- via the input system.
-
if NEW_LEDS
config LEDS_CLASS
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 1972d57aadb3..e7fbc08a0627 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -342,7 +342,7 @@ void st_int_recv(void *disc_data,
/* Unknow packet? */
default:
type = *ptr;
- if (st_gdata->list[type] == NULL) {
+ if (type >= ST_MAX_CHANNELS || st_gdata->list[type] == NULL) {
pr_err("chip/interface misbehavior dropping"
" frame starting with 0x%02x", type);
goto done;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index df89b6c3db41..62377271241e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1517,7 +1517,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
/* add filter to the list */
if (parent)
- hlist_add_after(&parent->fdir_node, &input->fdir_node);
+ hlist_add_behind(&input->fdir_node, &parent->fdir_node);
else
hlist_add_head(&input->fdir_node,
&pf->fdir_filter_list);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index a452730a3278..a6e5bcc12c8f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2518,7 +2518,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
/* add filter to the list */
if (parent)
- hlist_add_after(&parent->fdir_node, &input->fdir_node);
+ hlist_add_behind(&input->fdir_node, &parent->fdir_node);
else
hlist_add_head(&input->fdir_node,
&adapter->fdir_filter_list);
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 768dfe9a9315..6d3e2093bf7f 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = {
.resume = toshoboe_wakeup
};
-static int __init
-donauboe_init (void)
-{
- return pci_register_driver(&donauboe_pci_driver);
-}
-
-static void __exit
-donauboe_cleanup (void)
-{
- pci_unregister_driver(&donauboe_pci_driver);
-}
-
-module_init(donauboe_init);
-module_exit(donauboe_cleanup);
+module_pci_driver(donauboe_pci_driver);
diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c
index c864f82bd37d..30e981be14c2 100644
--- a/drivers/parport/parport_ip32.c
+++ b/drivers/parport/parport_ip32.c
@@ -2204,7 +2204,7 @@ static int __init parport_ip32_init(void)
{
pr_info(PPIP32 "SGI IP32 built-in parallel port driver v0.6\n");
this_port = parport_ip32_probe_port();
- return IS_ERR(this_port) ? PTR_ERR(this_port) : 0;
+ return PTR_ERR_OR_ZERO(this_port);
}
/**
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 0754f5c7cb3b..a672dd16da62 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -760,6 +760,15 @@ config RTC_DRV_DS1742
This driver can also be built as a module. If so, the module
will be called rtc-ds1742.
+config RTC_DRV_DS2404
+ tristate "Maxim/Dallas DS2404"
+ help
+ If you say yes here you get support for the
+ Dallas DS2404 RTC chip.
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-ds2404.
+
config RTC_DRV_DA9052
tristate "Dialog DA9052/DA9053 RTC"
depends on PMIC_DA9052
@@ -873,15 +882,6 @@ config RTC_DRV_V3020
This driver can also be built as a module. If so, the module
will be called rtc-v3020.
-config RTC_DRV_DS2404
- tristate "Dallas DS2404"
- help
- If you say yes here you get support for the
- Dallas DS2404 RTC chip.
-
- This driver can also be built as a module. If so, the module
- will be called rtc-ds2404.
-
config RTC_DRV_WM831X
tristate "Wolfson Microelectronics WM831x RTC"
depends on MFD_WM831X
@@ -1349,6 +1349,7 @@ config RTC_DRV_SIRFSOC
config RTC_DRV_MOXART
tristate "MOXA ART RTC"
+ depends on ARCH_MOXART || COMPILE_TEST
help
If you say yes here you get support for the MOXA ART
RTC module.
diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c
index c3719189dd96..ae9f997223b1 100644
--- a/drivers/rtc/rtc-ds1343.c
+++ b/drivers/rtc/rtc-ds1343.c
@@ -4,6 +4,7 @@
* Real Time Clock
*
* Author : Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>
+ * Ankur Srivastava <sankurece@gmail.com> : DS1343 Nvram Support
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -45,6 +46,9 @@
#define DS1343_CONTROL_REG 0x0F
#define DS1343_STATUS_REG 0x10
#define DS1343_TRICKLE_REG 0x11
+#define DS1343_NVRAM 0x20
+
+#define DS1343_NVRAM_LEN 96
/* DS1343 Control Registers bits */
#define DS1343_EOSC 0x80
@@ -149,6 +153,64 @@ static ssize_t ds1343_store_glitchfilter(struct device *dev,
static DEVICE_ATTR(glitch_filter, S_IRUGO | S_IWUSR, ds1343_show_glitchfilter,
ds1343_store_glitchfilter);
+static ssize_t ds1343_nvram_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ int ret;
+ unsigned char address;
+ struct device *dev = kobj_to_dev(kobj);
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+
+ if (unlikely(!count))
+ return count;
+
+ if ((count + off) > DS1343_NVRAM_LEN)
+ count = DS1343_NVRAM_LEN - off;
+
+ address = DS1343_NVRAM + off;
+
+ ret = regmap_bulk_write(priv->map, address, buf, count);
+ if (ret < 0)
+ dev_err(&priv->spi->dev, "Error in nvram write %d", ret);
+
+ return (ret < 0) ? ret : count;
+}
+
+
+static ssize_t ds1343_nvram_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ int ret;
+ unsigned char address;
+ struct device *dev = kobj_to_dev(kobj);
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+
+ if (unlikely(!count))
+ return count;
+
+ if ((count + off) > DS1343_NVRAM_LEN)
+ count = DS1343_NVRAM_LEN - off;
+
+ address = DS1343_NVRAM + off;
+
+ ret = regmap_bulk_read(priv->map, address, buf, count);
+ if (ret < 0)
+ dev_err(&priv->spi->dev, "Error in nvram read %d\n", ret);
+
+ return (ret < 0) ? ret : count;
+}
+
+
+static struct bin_attribute nvram_attr = {
+ .attr.name = "nvram",
+ .attr.mode = S_IRUGO | S_IWUSR,
+ .read = ds1343_nvram_read,
+ .write = ds1343_nvram_write,
+ .size = DS1343_NVRAM_LEN,
+};
+
static ssize_t ds1343_show_alarmstatus(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -274,12 +336,16 @@ static int ds1343_sysfs_register(struct device *dev)
if (err)
goto error1;
+ err = device_create_bin_file(dev, &nvram_attr);
+ if (err)
+ goto error2;
+
if (priv->irq <= 0)
return err;
err = device_create_file(dev, &dev_attr_alarm_mode);
if (err)
- goto error2;
+ goto error3;
err = device_create_file(dev, &dev_attr_alarm_status);
if (!err)
@@ -287,6 +353,9 @@ static int ds1343_sysfs_register(struct device *dev)
device_remove_file(dev, &dev_attr_alarm_mode);
+error3:
+ device_remove_bin_file(dev, &nvram_attr);
+
error2:
device_remove_file(dev, &dev_attr_trickle_charger);
@@ -302,6 +371,7 @@ static void ds1343_sysfs_unregister(struct device *dev)
device_remove_file(dev, &dev_attr_glitch_filter);
device_remove_file(dev, &dev_attr_trickle_charger);
+ device_remove_bin_file(dev, &nvram_attr);
if (priv->irq <= 0)
return;
@@ -684,6 +754,7 @@ static struct spi_driver ds1343_driver = {
module_spi_driver(ds1343_driver);
MODULE_DESCRIPTION("DS1343 RTC SPI Driver");
-MODULE_AUTHOR("Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>");
+MODULE_AUTHOR("Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>,"
+ "Ankur Srivastava <sankurece@gmail.com>");
MODULE_LICENSE("GPL v2");
MODULE_VERSION(DS1343_DRV_VERSION);
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index c6b2191a4128..9822715db8ba 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -231,7 +231,7 @@ static struct platform_driver ds1742_rtc_driver = {
.driver = {
.name = "rtc-ds1742",
.owner = THIS_MODULE,
- .of_match_table = ds1742_rtc_of_match,
+ .of_match_table = of_match_ptr(ds1742_rtc_of_match),
},
};
diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index c4c38431012e..8225b89de810 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/stringify.h>
#include <linux/time.h>
#include <linux/platform_device.h>
#include <linux/rtc.h>
@@ -48,8 +49,8 @@ compute_wday(efi_time_t *eft)
int y;
int ndays = 0;
- if (eft->year < 1998) {
- pr_err("EFI year < 1998, invalid date\n");
+ if (eft->year < EFI_RTC_EPOCH) {
+ pr_err("EFI year < " __stringify(EFI_RTC_EPOCH) ", invalid date\n");
return -1;
}
@@ -78,19 +79,36 @@ convert_to_efi_time(struct rtc_time *wtime, efi_time_t *eft)
eft->timezone = EFI_UNSPECIFIED_TIMEZONE;
}
-static void
+static bool
convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
{
memset(wtime, 0, sizeof(*wtime));
+
+ if (eft->second >= 60)
+ return false;
wtime->tm_sec = eft->second;
+
+ if (eft->minute >= 60)
+ return false;
wtime->tm_min = eft->minute;
+
+ if (eft->hour >= 24)
+ return false;
wtime->tm_hour = eft->hour;
+
+ if (!eft->day || eft->day > 31)
+ return false;
wtime->tm_mday = eft->day;
+
+ if (!eft->month || eft->month > 12)
+ return false;
wtime->tm_mon = eft->month - 1;
wtime->tm_year = eft->year - 1900;
/* day of the week [0-6], Sunday=0 */
wtime->tm_wday = compute_wday(eft);
+ if (wtime->tm_wday < 0)
+ return false;
/* day in the year [1-365]*/
wtime->tm_yday = compute_yday(eft);
@@ -106,6 +124,8 @@ convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
default:
wtime->tm_isdst = -1;
}
+
+ return true;
}
static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
@@ -122,7 +142,8 @@ static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
if (status != EFI_SUCCESS)
return -EINVAL;
- convert_from_efi_time(&eft, &wkalrm->time);
+ if (!convert_from_efi_time(&eft, &wkalrm->time))
+ return -EIO;
return rtc_valid_tm(&wkalrm->time);
}
@@ -163,7 +184,8 @@ static int efi_read_time(struct device *dev, struct rtc_time *tm)
return -EINVAL;
}
- convert_from_efi_time(&eft, tm);
+ if (!convert_from_efi_time(&eft, tm))
+ return -EIO;
return rtc_valid_tm(tm);
}
diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c
index 6d2b455d1be4..6db73910cd49 100644
--- a/drivers/staging/lustre/lustre/libcfs/hash.c
+++ b/drivers/staging/lustre/lustre/libcfs/hash.c
@@ -351,7 +351,7 @@ cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
cfs_hash_dhead_t, dh_head);
if (dh->dh_tail != NULL) /* not empty */
- hlist_add_after(dh->dh_tail, hnode);
+ hlist_add_behind(hnode, dh->dh_tail);
else /* empty list */
hlist_add_head(hnode, &dh->dh_head);
dh->dh_tail = hnode;
@@ -406,7 +406,7 @@ cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
cfs_hash_dhead_dep_t, dd_head);
if (dh->dd_tail != NULL) /* not empty */
- hlist_add_after(dh->dd_tail, hnode);
+ hlist_add_behind(hnode, dh->dd_tail);
else /* empty list */
hlist_add_head(hnode, &dh->dd_head);
dh->dd_tail = hnode;
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index b24aa010f68c..65cd80bf9aec 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -13,6 +13,10 @@ config VT
bool "Virtual terminal" if EXPERT
depends on !S390 && !UML
select INPUT
+ select NEW_LEDS
+ select LEDS_CLASS
+ select LEDS_TRIGGERS
+ select INPUT_LEDS
default y
---help---
If you say Y here, you will get support for terminal devices with
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index d0e3a4497707..d6ecfc9e734f 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/leds.h>
#include <linux/kbd_kern.h>
#include <linux/kbd_diacr.h>
@@ -130,6 +131,7 @@ static char rep; /* flag telling character repeat */
static int shift_state = 0;
static unsigned char ledstate = 0xff; /* undefined */
+static unsigned char lockstate = 0xff; /* undefined */
static unsigned char ledioctl;
/*
@@ -961,6 +963,41 @@ static void k_brl(struct vc_data *vc, unsigned char value, char up_flag)
}
}
+/* We route VT keyboard "leds" through triggers */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_ledstate[] = {
+#define DEFINE_LEDSTATE_TRIGGER(kbd_led, nam) \
+ [kbd_led] = { \
+ .name = nam, \
+ .activate = kbd_ledstate_trigger_activate, \
+ }
+ DEFINE_LEDSTATE_TRIGGER(VC_SCROLLOCK, "kbd-scrollock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_NUMLOCK, "kbd-numlock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_CAPSLOCK, "kbd-capslock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_KANALOCK, "kbd-kanalock"),
+#undef DEFINE_LEDSTATE_TRIGGER
+};
+
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_lockstate[] = {
+#define DEFINE_LOCKSTATE_TRIGGER(kbd_led, nam) \
+ [kbd_led] = { \
+ .name = nam, \
+ .activate = kbd_lockstate_trigger_activate, \
+ }
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLOCK, "kbd-shiftlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_ALTGRLOCK, "kbd-altgrlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLOCK, "kbd-ctrllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_ALTLOCK, "kbd-altlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLLOCK, "kbd-shiftllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTRLOCK, "kbd-shiftrlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLLOCK, "kbd-ctrlllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLRLOCK, "kbd-ctrlrlock"),
+#undef DEFINE_LOCKSTATE_TRIGGER
+};
+
/*
* The leds display either (i) the status of NumLock, CapsLock, ScrollLock,
* or (ii) whatever pattern of lights people want to show using KDSETLED,
@@ -995,18 +1032,25 @@ static inline unsigned char getleds(void)
return kbd->ledflagstate;
}
-static int kbd_update_leds_helper(struct input_handle *handle, void *data)
+/* Called on trigger connection, to set initial state */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev)
{
- unsigned char leds = *(unsigned char *)data;
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - ledtrig_ledstate;
- if (test_bit(EV_LED, handle->dev->evbit)) {
- input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01));
- input_inject_event(handle, EV_LED, LED_NUML, !!(leds & 0x02));
- input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & 0x04));
- input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
- }
+ tasklet_disable(&keyboard_tasklet);
+ led_trigger_event(trigger, ledstate & (1 << led) ? LED_FULL : LED_OFF);
+ tasklet_enable(&keyboard_tasklet);
+}
- return 0;
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev)
+{
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - ledtrig_lockstate;
+
+ tasklet_disable(&keyboard_tasklet);
+ led_trigger_event(trigger, lockstate & (1 << led) ? LED_FULL : LED_OFF);
+ tasklet_enable(&keyboard_tasklet);
}
/**
@@ -1095,16 +1139,29 @@ static void kbd_bh(unsigned long dummy)
{
unsigned char leds;
unsigned long flags;
-
+ int i;
+
spin_lock_irqsave(&led_lock, flags);
leds = getleds();
spin_unlock_irqrestore(&led_lock, flags);
if (leds != ledstate) {
- input_handler_for_each_handle(&kbd_handler, &leds,
- kbd_update_leds_helper);
+ for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++)
+ if ((leds ^ ledstate) & (1 << i))
+ led_trigger_event(&ledtrig_ledstate[i],
+ leds & (1 << i)
+ ? LED_FULL : LED_OFF);
ledstate = leds;
}
+
+ if (kbd->lockstate != lockstate) {
+ for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++)
+ if ((kbd->lockstate ^ lockstate) & (1 << i))
+ led_trigger_event(&ledtrig_lockstate[i],
+ kbd->lockstate & (1 << i)
+ ? LED_FULL : LED_OFF);
+ lockstate = kbd->lockstate;
+ }
}
DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh, 0);
@@ -1442,20 +1499,6 @@ static void kbd_disconnect(struct input_handle *handle)
kfree(handle);
}
-/*
- * Start keyboard handler on the new keyboard by refreshing LED state to
- * match the rest of the system.
- */
-static void kbd_start(struct input_handle *handle)
-{
- tasklet_disable(&keyboard_tasklet);
-
- if (ledstate != 0xff)
- kbd_update_leds_helper(handle, &ledstate);
-
- tasklet_enable(&keyboard_tasklet);
-}
-
static const struct input_device_id kbd_ids[] = {
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
@@ -1477,7 +1520,6 @@ static struct input_handler kbd_handler = {
.match = kbd_match,
.connect = kbd_connect,
.disconnect = kbd_disconnect,
- .start = kbd_start,
.name = "kbd",
.id_table = kbd_ids,
};
@@ -1501,6 +1543,20 @@ int __init kbd_init(void)
if (error)
return error;
+ for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++) {
+ error = led_trigger_register(&ledtrig_ledstate[i]);
+ if (error)
+ pr_err("error %d while registering trigger %s\n",
+ error, ledtrig_ledstate[i].name);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++) {
+ error = led_trigger_register(&ledtrig_lockstate[i]);
+ if (error)
+ pr_err("error %d while registering trigger %s\n",
+ error, ledtrig_lockstate[i].name);
+ }
+
tasklet_enable(&keyboard_tasklet);
tasklet_schedule(&keyboard_tasklet);
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 428089009cd5..19b170d9e570 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -190,8 +190,6 @@ static ssize_t brightness_store(struct device *dev,
}
mutex_unlock(&bd->ops_lock);
- backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);
-
return rc;
}
static DEVICE_ATTR_RW(brightness);
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index d9e3bee4e653..c52f1ce15f1e 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -55,10 +55,10 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
}
size >>= sb->s_blocksize_bits;
- if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) {
+ if (size > ARRAY_SIZE(dir->bh)) {
/* this directory is too big for fixed bh set, must allocate */
struct buffer_head **bh_fplus =
- kzalloc(size * sizeof(struct buffer_head *),
+ kcalloc(size, sizeof(struct buffer_head *),
GFP_KERNEL);
if (!bh_fplus) {
adfs_error(sb, "not enough memory for"
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d7bd395ab586..1c55388ae633 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -210,7 +210,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
int pipefd;
struct autofs_sb_info *sbi;
struct autofs_info *ino;
- int pgrp;
+ int pgrp = 0;
bool pgrp_set = false;
int ret = -EINVAL;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3892c1a23241..37d54b11d674 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -145,6 +145,25 @@ static int padzero(unsigned long elf_bss)
#define ELF_BASE_PLATFORM NULL
#endif
+/*
+ * Use get_random_int() to implement AT_RANDOM while avoiding depletion
+ * of the entropy pool.
+ */
+static void get_atrandom_bytes(unsigned char *buf, size_t nbytes)
+{
+ unsigned char *p = buf;
+
+ while (nbytes) {
+ unsigned int random_variable;
+ size_t chunk = min(nbytes, sizeof(random_variable));
+
+ random_variable = get_random_int();
+ memcpy(p, &random_variable, chunk);
+ p += chunk;
+ nbytes -= chunk;
+ }
+}
+
static int
create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
unsigned long load_addr, unsigned long interp_load_addr)
@@ -206,7 +225,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
/*
* Generate 16 random bytes for userspace PRNG seeding.
*/
- get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
+ get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes));
u_rand_bytes = (elf_addr_t __user *)
STACK_ALLOC(p, sizeof(k_rand_bytes));
if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
@@ -1816,7 +1835,7 @@ static int elf_note_info_init(struct elf_note_info *info)
INIT_LIST_HEAD(&info->thread_list);
/* Allocate space for ELF notes */
- info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
+ info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
if (!info->notes)
return 0;
info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index d749731dc0ee..dd333a67ebc2 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -39,13 +39,11 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
args);
/* start by checking things over */
- ASSERT(cache->fstop_percent >= 0 &&
- cache->fstop_percent < cache->fcull_percent &&
+ ASSERT(cache->fstop_percent < cache->fcull_percent &&
cache->fcull_percent < cache->frun_percent &&
cache->frun_percent < 100);
- ASSERT(cache->bstop_percent >= 0 &&
- cache->bstop_percent < cache->bcull_percent &&
+ ASSERT(cache->bstop_percent < cache->bcull_percent &&
cache->bcull_percent < cache->brun_percent &&
cache->brun_percent < 100);
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index b078d3081d6c..ff43e5a8711f 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -222,7 +222,7 @@ static ssize_t cachefiles_daemon_write(struct file *file,
if (test_bit(CACHEFILES_DEAD, &cache->flags))
return -EIO;
- if (datalen < 0 || datalen > PAGE_SIZE - 1)
+ if (datalen > PAGE_SIZE - 1)
return -EOPNOTSUPP;
/* drag the command string into the kernel so we can parse it */
@@ -385,7 +385,7 @@ static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args)
if (args[0] != '%' || args[1] != '\0')
return -EINVAL;
- if (fstop < 0 || fstop >= cache->fcull_percent)
+ if (fstop >= cache->fcull_percent)
return cachefiles_daemon_range_error(cache, args);
cache->fstop_percent = fstop;
@@ -457,7 +457,7 @@ static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args)
if (args[0] != '%' || args[1] != '\0')
return -EINVAL;
- if (bstop < 0 || bstop >= cache->bcull_percent)
+ if (bstop >= cache->bcull_percent)
return cachefiles_daemon_range_error(cache, args);
cache->bstop_percent = bstop;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 7ff866dbb89e..54ac0e8ad96c 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -38,7 +38,7 @@ static const struct cifs_sid sid_everyone = {
1, 1, {0, 0, 0, 0, 0, 1}, {0} };
/* security id for Authenticated Users system group */
static const struct cifs_sid sid_authusers = {
- 1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11)} };
+ 1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} };
/* group users */
static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 41c90671de0c..3bf2f416326b 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2476,14 +2476,14 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
}
parm_data = (struct cifs_posix_lock *)
((char *)&pSMBr->hdr.Protocol + data_offset);
- if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK))
+ if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
pLockData->fl_type = F_UNLCK;
else {
if (parm_data->lock_type ==
- __constant_cpu_to_le16(CIFS_RDLCK))
+ cpu_to_le16(CIFS_RDLCK))
pLockData->fl_type = F_RDLCK;
else if (parm_data->lock_type ==
- __constant_cpu_to_le16(CIFS_WRLCK))
+ cpu_to_le16(CIFS_WRLCK))
pLockData->fl_type = F_WRLCK;
pLockData->fl_start = le64_to_cpu(parm_data->start);
@@ -3275,25 +3275,25 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
pSMB->TotalParameterCount = 0;
- pSMB->TotalDataCount = __constant_cpu_to_le32(2);
+ pSMB->TotalDataCount = cpu_to_le32(2);
pSMB->MaxParameterCount = 0;
pSMB->MaxDataCount = 0;
pSMB->MaxSetupCount = 4;
pSMB->Reserved = 0;
pSMB->ParameterOffset = 0;
- pSMB->DataCount = __constant_cpu_to_le32(2);
+ pSMB->DataCount = cpu_to_le32(2);
pSMB->DataOffset =
cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req,
compression_state) - 4); /* 84 */
pSMB->SetupCount = 4;
- pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL);
+ pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
pSMB->ParameterCount = 0;
- pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION);
+ pSMB->FunctionCode = cpu_to_le32(FSCTL_SET_COMPRESSION);
pSMB->IsFsctl = 1; /* FSCTL */
pSMB->IsRootFlag = 0;
pSMB->Fid = fid; /* file handle always le */
/* 3 byte pad, followed by 2 byte compress state */
- pSMB->ByteCount = __constant_cpu_to_le16(5);
+ pSMB->ByteCount = cpu_to_le16(5);
inc_rfc1001_len(pSMB, 5);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3429,10 +3429,10 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
cifs_acl->version = cpu_to_le16(1);
if (acl_type == ACL_TYPE_ACCESS) {
cifs_acl->access_entry_count = cpu_to_le16(count);
- cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+ cifs_acl->default_entry_count = cpu_to_le16(0xFFFF);
} else if (acl_type == ACL_TYPE_DEFAULT) {
cifs_acl->default_entry_count = cpu_to_le16(count);
- cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+ cifs_acl->access_entry_count = cpu_to_le16(0xFFFF);
} else {
cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
return 0;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index baa7b31c5b77..cedcef1c009d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1058,7 +1058,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
- buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
if (!buf) {
free_xid(xid);
return -ENOMEM;
@@ -1393,7 +1393,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
- buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
if (!buf)
return -ENOMEM;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index e87387dbf39f..27e6175a7f7e 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -46,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
USHRT_MAX));
pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
- pSMB->req.VcNumber = __constant_cpu_to_le16(1);
+ pSMB->req.VcNumber = cpu_to_le16(1);
/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 3f17b4550831..e5100b8fb3a0 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -111,7 +111,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
return -EINVAL;
max_num = max_buf / sizeof(struct smb2_lock_element);
- buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -247,7 +247,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
}
max_num = max_buf / sizeof(struct smb2_lock_element);
- buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf) {
free_xid(xid);
return -ENOMEM;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index b8021fde987d..36867bd21d04 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -67,27 +67,27 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
* indexed by command in host byte order
*/
static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
- /* SMB2_NEGOTIATE */ __constant_cpu_to_le16(65),
- /* SMB2_SESSION_SETUP */ __constant_cpu_to_le16(9),
- /* SMB2_LOGOFF */ __constant_cpu_to_le16(4),
- /* SMB2_TREE_CONNECT */ __constant_cpu_to_le16(16),
- /* SMB2_TREE_DISCONNECT */ __constant_cpu_to_le16(4),
- /* SMB2_CREATE */ __constant_cpu_to_le16(89),
- /* SMB2_CLOSE */ __constant_cpu_to_le16(60),
- /* SMB2_FLUSH */ __constant_cpu_to_le16(4),
- /* SMB2_READ */ __constant_cpu_to_le16(17),
- /* SMB2_WRITE */ __constant_cpu_to_le16(17),
- /* SMB2_LOCK */ __constant_cpu_to_le16(4),
- /* SMB2_IOCTL */ __constant_cpu_to_le16(49),
+ /* SMB2_NEGOTIATE */ cpu_to_le16(65),
+ /* SMB2_SESSION_SETUP */ cpu_to_le16(9),
+ /* SMB2_LOGOFF */ cpu_to_le16(4),
+ /* SMB2_TREE_CONNECT */ cpu_to_le16(16),
+ /* SMB2_TREE_DISCONNECT */ cpu_to_le16(4),
+ /* SMB2_CREATE */ cpu_to_le16(89),
+ /* SMB2_CLOSE */ cpu_to_le16(60),
+ /* SMB2_FLUSH */ cpu_to_le16(4),
+ /* SMB2_READ */ cpu_to_le16(17),
+ /* SMB2_WRITE */ cpu_to_le16(17),
+ /* SMB2_LOCK */ cpu_to_le16(4),
+ /* SMB2_IOCTL */ cpu_to_le16(49),
/* BB CHECK this ... not listed in documentation */
- /* SMB2_CANCEL */ __constant_cpu_to_le16(0),
- /* SMB2_ECHO */ __constant_cpu_to_le16(4),
- /* SMB2_QUERY_DIRECTORY */ __constant_cpu_to_le16(9),
- /* SMB2_CHANGE_NOTIFY */ __constant_cpu_to_le16(9),
- /* SMB2_QUERY_INFO */ __constant_cpu_to_le16(9),
- /* SMB2_SET_INFO */ __constant_cpu_to_le16(2),
+ /* SMB2_CANCEL */ cpu_to_le16(0),
+ /* SMB2_ECHO */ cpu_to_le16(4),
+ /* SMB2_QUERY_DIRECTORY */ cpu_to_le16(9),
+ /* SMB2_CHANGE_NOTIFY */ cpu_to_le16(9),
+ /* SMB2_QUERY_INFO */ cpu_to_le16(9),
+ /* SMB2_SET_INFO */ cpu_to_le16(2),
/* BB FIXME can also be 44 for lease break */
- /* SMB2_OPLOCK_BREAK */ __constant_cpu_to_le16(24)
+ /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
};
int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e35ce5b3d88f..87a53afd13a5 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -547,7 +547,7 @@ smb2_clone_range(const unsigned int xid,
goto cchunk_out;
/* For now array only one chunk long, will make more flexible later */
- pcchunk->ChunkCount = __constant_cpu_to_le32(1);
+ pcchunk->ChunkCount = cpu_to_le32(1);
pcchunk->Reserved = 0;
pcchunk->Reserved2 = 0;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 0158104df745..b3f05b433d62 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1359,7 +1359,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
char *ret_data = NULL;
fsctl_input.CompressionState =
- __constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+ cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SET_COMPRESSION, true /* is_fsctl */,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 69f3595d3952..d03adad014db 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -85,7 +85,7 @@
/* BB FIXME - analyze following length BB */
#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
-#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
+#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
/*
* SMB2 Header Definition
@@ -96,7 +96,7 @@
*
*/
-#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
+#define SMB2_HEADER_STRUCTURE_SIZE cpu_to_le16(64)
struct smb2_hdr {
__be32 smb2_buf_length; /* big endian on wire */
@@ -137,16 +137,16 @@ struct smb2_transform_hdr {
} __packed;
/* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM __constant_cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
/*
* SMB2 flag definitions
*/
-#define SMB2_FLAGS_SERVER_TO_REDIR __constant_cpu_to_le32(0x00000001)
-#define SMB2_FLAGS_ASYNC_COMMAND __constant_cpu_to_le32(0x00000002)
-#define SMB2_FLAGS_RELATED_OPERATIONS __constant_cpu_to_le32(0x00000004)
-#define SMB2_FLAGS_SIGNED __constant_cpu_to_le32(0x00000008)
-#define SMB2_FLAGS_DFS_OPERATIONS __constant_cpu_to_le32(0x10000000)
+#define SMB2_FLAGS_SERVER_TO_REDIR cpu_to_le32(0x00000001)
+#define SMB2_FLAGS_ASYNC_COMMAND cpu_to_le32(0x00000002)
+#define SMB2_FLAGS_RELATED_OPERATIONS cpu_to_le32(0x00000004)
+#define SMB2_FLAGS_SIGNED cpu_to_le32(0x00000008)
+#define SMB2_FLAGS_DFS_OPERATIONS cpu_to_le32(0x10000000)
/*
* Definitions for SMB2 Protocol Data Units (network frames)
@@ -157,7 +157,7 @@ struct smb2_transform_hdr {
*
*/
-#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
+#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
struct smb2_err_rsp {
struct smb2_hdr hdr;
@@ -500,12 +500,12 @@ struct create_context {
#define SMB2_LEASE_HANDLE_CACHING_HE 0x02
#define SMB2_LEASE_WRITE_CACHING_HE 0x04
-#define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00)
-#define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01)
-#define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02)
-#define SMB2_LEASE_WRITE_CACHING __constant_cpu_to_le32(0x04)
+#define SMB2_LEASE_NONE cpu_to_le32(0x00)
+#define SMB2_LEASE_READ_CACHING cpu_to_le32(0x01)
+#define SMB2_LEASE_HANDLE_CACHING cpu_to_le32(0x02)
+#define SMB2_LEASE_WRITE_CACHING cpu_to_le32(0x04)
-#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02)
#define SMB2_LEASE_KEY_SIZE 16
diff --git a/fs/compat.c b/fs/compat.c
index 66d3d3c6b4b2..6917fdba382c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -562,7 +562,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
goto out;
ret = -EINVAL;
- if (nr_segs > UIO_MAXIOV || nr_segs < 0)
+ if (nr_segs > UIO_MAXIOV)
goto out;
if (nr_segs > fast_segs) {
ret = -ENOMEM;
diff --git a/fs/exec.c b/fs/exec.c
index a3d33fe592d6..2ef2751f5a8d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -368,10 +368,6 @@ static int bprm_mm_init(struct linux_binprm *bprm)
if (!mm)
goto err;
- err = init_new_context(current, mm);
- if (err)
- goto err;
-
err = __bprm_mm_init(bprm);
if (err)
goto err;
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 7f20f25c232c..84529b8a331b 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -116,7 +116,7 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width,
num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa,
pages_in_unit - i);
- __a1pa = kzalloc(num_a1pa * sizeof__a1pa, GFP_KERNEL);
+ __a1pa = kcalloc(num_a1pa, sizeof__a1pa, GFP_KERNEL);
if (unlikely(!__a1pa)) {
ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n",
num_a1pa);
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a8bc47f75fa0..5b6e9f246233 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -107,7 +107,10 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
if (!journal) {
- ret = generic_file_fsync(file, start, end, datasync);
+ if (test_opt(inode->i_sb, BARRIER))
+ ret = generic_file_fsync(file, start, end, datasync);
+ else
+ ret = __generic_file_fsync(file, start, end, datasync);
if (!ret && !hlist_empty(&inode->i_dentry))
ret = ext4_sync_parent(inode);
goto out;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 32602c667b4a..7892e6fddb66 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
}
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
- u32 parent, struct qstr *str)
+/* Generates key for catalog file/folders record. */
+int hfsplus_cat_build_key(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent, struct qstr *str)
{
- int len;
+ int len, err;
key->cat.parent = cpu_to_be32(parent);
- if (str) {
- hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
- str->name, str->len);
- len = be16_to_cpu(key->cat.name.length);
- } else {
- key->cat.name.length = 0;
- len = 0;
- }
+ err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
+ str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
+ len = be16_to_cpu(key->cat.name.length);
key->key_len = cpu_to_be16(6 + 2 * len);
+ return 0;
+}
+
+/* Generates key for catalog thread record. */
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent)
+{
+ key->cat.parent = cpu_to_be32(parent);
+ key->cat.name.length = 0;
+ key->key_len = cpu_to_be16(6);
}
static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent,
@@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
hfsplus_cat_entry *entry, int type,
u32 parentid, struct qstr *str)
{
+ int err;
+
entry->type = cpu_to_be16(type);
entry->thread.reserved = 0;
entry->thread.parentID = cpu_to_be32(parentid);
- hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
+ err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
}
@@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
int err;
u16 type;
- hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid);
err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry));
if (err)
return err;
@@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
return err;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry,
S_ISDIR(inode->i_mode) ?
HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
dir->i_ino, str);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto err2;
+ }
+
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
goto err2;
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto err1;
+
entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
@@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
return 0;
err1:
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
hfs_brec_remove(&fd);
err2:
@@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (!str) {
int len;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
off + 2, len);
fd.search_key->key_len = cpu_to_be16(6 + len);
} else
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto out;
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
@@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (err)
goto out;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_fd = src_fd;
/* find the old dir entry and read the data */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid,
type = be16_to_cpu(entry.type);
/* create new dir entry with the data from the old entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
+ err = hfsplus_cat_build_key(sb, dst_fd.search_key,
+ dst_dir->i_ino, dst_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
/* finally remove the old entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid,
src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
/* remove old thread entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid,
goto out;
/* create new thread entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
dst_dir->i_ino, dst_name);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto out;
+ }
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 610a3260bef1..435bea231cc6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -44,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
if (err)
return ERR_PTR(err);
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino,
+ &dentry->d_name);
+ if (unlikely(err < 0))
+ goto fail;
again:
err = hfs_brec_read(&fd, &entry, sizeof(entry));
if (err) {
@@ -97,9 +100,11 @@ again:
be32_to_cpu(entry.file.permissions.dev);
str.len = sprintf(name, "iNode%d", linkid);
str.name = name;
- hfsplus_cat_build_key(sb, fd.search_key,
+ err = hfsplus_cat_build_key(sb, fd.search_key,
HFSPLUS_SB(sb)->hidden_dir->i_ino,
&str);
+ if (unlikely(err < 0))
+ goto fail;
goto again;
}
} else if (!dentry->d_fsdata)
@@ -145,7 +150,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
err = -ENOMEM;
goto out;
}
- hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index eb5e059f481a..b0441d65fa54 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -443,8 +443,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1,
const hfsplus_btree_key *k2);
int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
const hfsplus_btree_key *k2);
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
+int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
u32 parent, struct qstr *str);
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent);
void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms);
int hfsplus_find_cat(struct super_block *sb, u32 cnid,
struct hfs_find_data *fd);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4cf2024b87da..593af2fdcc2d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -515,7 +515,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
err = hfs_find_init(sbi->cat_tree, &fd);
if (err)
goto out_put_root;
- hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ if (unlikely(err < 0))
+ goto out_put_root;
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
diff --git a/fs/isofs/Makefile b/fs/isofs/Makefile
index bf162f0942d5..47a68e357512 100644
--- a/fs/isofs/Makefile
+++ b/fs/isofs/Makefile
@@ -8,3 +8,5 @@ isofs-objs-y := namei.o inode.o dir.o util.o rock.o export.o
isofs-objs-$(CONFIG_JOLIET) += joliet.o
isofs-objs-$(CONFIG_ZISOFS) += compress.o
isofs-objs := $(isofs-objs-y)
+
+# ccflags-y := -DDEBUG_FLAGS=1
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 592e5115a561..c9ba688dc407 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -15,6 +15,8 @@
*
* Transparent decompression of files on an iso9660 filesystem
*/
+#define DEBUG
+#define pr_fmt(fmt) "zisofs: " fmt
#include <linux/module.h>
#include <linux/init.h>
@@ -110,7 +112,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
*errp = -ENOMEM;
else
*errp = -EIO;
- printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
+ pr_debug("zisofs_inflateInit returned %d\n",
zerr);
goto z_eio;
}
@@ -154,12 +156,12 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
if (zerr == Z_MEM_ERROR)
*errp = -ENOMEM;
else {
- printk(KERN_DEBUG
+ pr_debug(
"zisofs: zisofs_inflate returned"
" %d, inode = %lu,"
" page idx = %d, bh idx = %d,"
- " avail_in = %d,"
- " avail_out = %d\n",
+ " avail_in = %ld,"
+ " avail_out = %ld\n",
zerr, inode->i_ino, curpage,
curbh, stream.avail_in,
stream.avail_out);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index 12088d8de3fa..44d1053dfad5 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -12,7 +12,7 @@
* Documentation/filesystems/nfs/Exporting
* fs/exportfs/expfs.c.
*/
-
+#define pr_fmt(fmt) "ISOFS: " fmt
#include "isofs.h"
static struct dentry *
@@ -52,8 +52,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
/* "child" must always be a directory. */
if (!S_ISDIR(child_inode->i_mode)) {
- printk(KERN_ERR "isofs: isofs_export_get_parent(): "
- "child is not a directory!\n");
+ pr_err("%s(): child is not a directory!\n", __func__);
rv = ERR_PTR(-EACCES);
goto out;
}
@@ -62,8 +61,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
* it is not zero, it means the directory failed to be
* normalized for some reason. */
if (e_child_inode->i_iget5_offset != 0) {
- printk(KERN_ERR "isofs: isofs_export_get_parent(): "
- "child directory not normalized!\n");
+ pr_err("isofs_export_get_parent(): child directory not normalized!\n");
rv = ERR_PTR(-EACCES);
goto out;
}
@@ -89,8 +87,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
/* Verify it is in fact the ".." entry. */
if ((isonum_711(de->name_len) != 1) || (de->name[0] != 1)) {
- printk(KERN_ERR "isofs: Unable to find the \"..\" "
- "directory for NFS.\n");
+ pr_err("Unable to find the \"..\" directory for NFS.\n");
rv = ERR_PTR(-EACCES);
goto out;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4556ce1af5b0..cc23d86e174a 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -10,6 +10,8 @@
* 2004 Paul Serice - Inode Support pushed out from 4GB to 128GB
* 2004 Paul Serice - NFS Export Operations
*/
+#define DEBUG
+#define pr_fmt(fmt) "ISOFS: " fmt
#include <linux/init.h>
#include <linux/module.h>
@@ -528,23 +530,25 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
Te.cdte_format=CDROM_LBA;
i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te);
if (!i) {
- printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n",
+ pr_debug("Session %d start %d type %d\n",
session, Te.cdte_addr.lba,
Te.cdte_ctrl&CDROM_DATA_TRACK);
if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4)
return Te.cdte_addr.lba;
}
- printk(KERN_ERR "ISOFS: Invalid session number or type of track\n");
+ pr_err("Invalid session number or type of track\n");
}
i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
if (session > 0)
- printk(KERN_ERR "ISOFS: Invalid session number\n");
+ pr_err("Invalid session number\n");
#if 0
- printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
+ pr_debug("isofs.inode: CDROMMULTISESSION: rc=%d\n", i);
if (i==0) {
- printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no");
- printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba);
+ pr_debug("isofs.inode: XA disk: %s\n",
+ ms_info.xa_flag?"yes":"no");
+ pr_debug("isofs.inode: vol_desc_start = %d\n",
+ ms_info.addr.lba);
}
#endif
if (i==0)
@@ -672,8 +676,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
else if (sec->escape[2] == 0x45)
joliet_level = 3;
- printk(KERN_DEBUG "ISO 9660 Extensions: "
- "Microsoft Joliet Level %d\n",
+ pr_debug("ISO 9660 Extensions: Microsoft Joliet Level %d\n",
joliet_level);
}
goto root_found;
@@ -771,11 +774,11 @@ root_found:
isonum_711(rootp->ext_attr_length);
sbi->s_firstdatazone = first_data_zone;
#ifndef BEQUIET
- printk(KERN_DEBUG "ISOFS: Max size:%ld Log zone size:%ld\n",
+ pr_debug("Max size:%ld Log zone size:%ld\n",
sbi->s_max_size, 1UL << sbi->s_log_zone_size);
- printk(KERN_DEBUG "ISOFS: First datazone:%ld\n", sbi->s_firstdatazone);
+ pr_debug("First datazone:%ld\n", sbi->s_firstdatazone);
if(sbi->s_high_sierra)
- printk(KERN_DEBUG "ISOFS: Disc in High Sierra format.\n");
+ pr_debug("Disc in High Sierra format.\n");
#endif
/*
@@ -878,9 +881,7 @@ root_found:
*/
if (sbi->s_rock == 1 && joliet_level &&
rootdir_empty(s, sbi->s_firstdatazone)) {
- printk(KERN_NOTICE
- "ISOFS: primary root directory is empty. "
- "Disabling Rock Ridge and switching to Joliet.");
+ pr_notice("primary root directory is empty. Disabling Rock Ridge and switching to Joliet.");
sbi->s_rock = 0;
}
@@ -898,8 +899,7 @@ root_found:
sbi->s_rock = 0;
if (sbi->s_firstdatazone != first_data_zone) {
sbi->s_firstdatazone = first_data_zone;
- printk(KERN_DEBUG
- "ISOFS: changing to secondary root\n");
+ pr_debug("changing to secondary root\n");
iput(inode);
inode = isofs_iget(s, sbi->s_firstdatazone, 0);
if (IS_ERR(inode))
@@ -918,9 +918,8 @@ root_found:
/* Make sure the root inode is a directory */
if (!S_ISDIR(inode->i_mode)) {
- printk(KERN_WARNING
- "isofs_fill_super: root inode is not a directory. "
- "Corrupted media?\n");
+ pr_warn("%s: root inode is not a directory. Corrupted media?\n",
+ __func__);
goto out_iput;
}
@@ -952,27 +951,26 @@ out_iput:
out_no_root:
error = PTR_ERR(inode);
if (error != -ENOMEM)
- printk(KERN_WARNING "%s: get root inode failed\n", __func__);
+ pr_warn("%s: get root inode failed\n", __func__);
out_no_inode:
#ifdef CONFIG_JOLIET
unload_nls(sbi->s_nls_iocharset);
#endif
goto out_freesbi;
out_no_read:
- printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
+ pr_warn("%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
__func__, s->s_id, iso_blknum, block);
goto out_freebh;
out_bad_zone_size:
- printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n",
- sbi->s_log_zone_size);
+ pr_warn("Bad logical zone size %ld\n", sbi->s_log_zone_size);
goto out_freebh;
out_bad_size:
- printk(KERN_WARNING "ISOFS: Logical zone size(%d) < hardware blocksize(%u)\n",
+ pr_warn("Logical zone size(%d) < hardware blocksize(%u)\n",
orig_zonesize, opt.blocksize);
goto out_freebh;
out_unknown_format:
if (!silent)
- printk(KERN_WARNING "ISOFS: Unable to identify CD-ROM format.\n");
+ pr_warn("Unable to identify CD-ROM format.\n");
out_freebh:
brelse(bh);
@@ -1021,7 +1019,7 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
error = -EIO;
rv = 0;
if (iblock != b_off) {
- printk(KERN_DEBUG "%s: block number too large\n", __func__);
+ pr_debug("%s: block number too large\n", __func__);
goto abort;
}
@@ -1042,7 +1040,7 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
* I/O errors.
*/
if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
- printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
+ pr_debug("%s: block >= EOF (%lu, %llu)\n",
__func__, b_off,
(unsigned long long)inode->i_size);
goto abort;
@@ -1068,12 +1066,11 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
iput(ninode);
if (++section > 100) {
- printk(KERN_DEBUG "%s: More than 100 file sections ?!?"
- " aborting...\n", __func__);
- printk(KERN_DEBUG "%s: block=%lu firstext=%u sect_size=%u "
- "nextblk=%lu nextoff=%lu\n", __func__,
- b_off, firstext, (unsigned) sect_size,
- nextblk, nextoff);
+ pr_debug("%s: More than 100 file sections ?!? aborting...\n",
+ __func__);
+ pr_debug("%s: block=%lu firstext=%u sect_size=%u nextblk=%lu nextoff=%lu\n",
+ __func__, b_off, firstext,
+ (unsigned) sect_size, nextblk, nextoff);
goto abort;
}
}
@@ -1105,7 +1102,7 @@ static int isofs_get_block(struct inode *inode, sector_t iblock,
int ret;
if (create) {
- printk(KERN_DEBUG "%s: Kernel tries to allocate a block\n", __func__);
+ pr_debug("%s: Kernel tries to allocate a block\n", __func__);
return -EROFS;
}
@@ -1248,13 +1245,12 @@ out_nomem:
return -ENOMEM;
out_noread:
- printk(KERN_INFO "ISOFS: unable to read i-node block %lu\n", block);
+ pr_info("unable to read i-node block %lu\n", block);
kfree(tmpde);
return -EIO;
out_toomany:
- printk(KERN_INFO "%s: More than 100 file sections ?!?, aborting...\n"
- "isofs_read_level3_size: inode=%lu\n",
+ pr_info("%s: More than 100 file sections ?!?, aborting...\n isofs_read_level3_size: inode=%lu\n",
__func__, inode->i_ino);
goto out;
}
@@ -1289,7 +1285,7 @@ static int isofs_read_inode(struct inode *inode)
tmpde = kmalloc(de_len, GFP_KERNEL);
if (tmpde == NULL) {
- printk(KERN_INFO "%s: out of memory\n", __func__);
+ pr_info("%s: out of memory\n", __func__);
ret = -ENOMEM;
goto fail;
}
@@ -1364,24 +1360,23 @@ static int isofs_read_inode(struct inode *inode)
inode->i_size &= 0x00ffffff;
if (de->interleave[0]) {
- printk(KERN_DEBUG "ISOFS: Interleaved files not (yet) supported.\n");
+ pr_debug("Interleaved files not (yet) supported.\n");
inode->i_size = 0;
}
/* I have no idea what file_unit_size is used for, so
we will flag it for now */
if (de->file_unit_size[0] != 0) {
- printk(KERN_DEBUG "ISOFS: File unit size != 0 for ISO file (%ld).\n",
- inode->i_ino);
+ pr_debug("File unit size != 0 for ISO file (%ld).\n",
+ inode->i_ino);
}
/* I have no idea what other flag bits are used for, so
we will flag it for now */
-#ifdef DEBUG
+#ifdef DEBUG_FLAGS
if((de->flags[-high_sierra] & ~2)!= 0){
- printk(KERN_DEBUG "ISOFS: Unusual flag settings for ISO file "
- "(%ld %x).\n",
- inode->i_ino, de->flags[-high_sierra]);
+ pr_debug("Unusual flag settings for ISO file (%ld %x).\n",
+ inode->i_ino, de->flags[-high_sierra]);
}
#endif
@@ -1450,7 +1445,7 @@ out:
return ret;
out_badread:
- printk(KERN_WARNING "ISOFS: unable to read i-node block\n");
+ pr_warn("unable to read i-node block\n");
fail:
goto out;
}
@@ -1541,6 +1536,7 @@ MODULE_ALIAS("iso9660");
static int __init init_iso9660_fs(void)
{
int err = init_inodecache();
+
if (err)
goto out;
#ifdef CONFIG_ZISOFS
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 95295640d9c8..c5ed09733112 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -113,9 +113,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
dpnt = de->name;
/* Basic sanity check, whether name doesn't exceed dir entry */
if (de_len < dlen + sizeof(struct iso_directory_record)) {
- printk(KERN_NOTICE "iso9660: Corrupted directory entry"
- " in block %lu of inode %lu\n", block,
- dir->i_ino);
+ pr_notice("Corrupted directory entry in block %lu of inode %lu\n",
+ block, dir->i_ino);
return 0;
}
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c0bf42472e40..b13119556e5d 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -5,6 +5,8 @@
*
* Rock Ridge Extensions to iso9660
*/
+#define DEBUG
+#define pr_fmt(fmt) "ISOFS: rock: " fmt
#include <linux/slab.h>
#include <linux/pagemap.h>
@@ -89,9 +91,8 @@ static int rock_continue(struct rock_state *rs)
if ((unsigned)rs->cont_offset > blocksize - min_de_size ||
(unsigned)rs->cont_size > blocksize ||
(unsigned)(rs->cont_offset + rs->cont_size) > blocksize) {
- printk(KERN_NOTICE "rock: corrupted directory entry. "
- "extent=%d, offset=%d, size=%d\n",
- rs->cont_extent, rs->cont_offset, rs->cont_size);
+ pr_notice("corrupted directory entry. extent=%d, offset=%d, size=%d\n",
+ rs->cont_extent, rs->cont_offset, rs->cont_size);
ret = -EIO;
goto out;
}
@@ -117,7 +118,7 @@ static int rock_continue(struct rock_state *rs)
rs->cont_offset = 0;
return 0;
}
- printk("Unable to read rock-ridge attributes\n");
+ pr_warn("Unable to read rock-ridge attributes\n");
}
out:
kfree(rs->buffer);
@@ -176,10 +177,9 @@ static int rock_check_overflow(struct rock_state *rs, int sig)
}
len += offsetof(struct rock_ridge, u);
if (len > rs->len) {
- printk(KERN_NOTICE "rock: directory entry would overflow "
- "storage\n");
- printk(KERN_NOTICE "rock: sig=0x%02x, size=%d, remaining=%d\n",
- sig, len, rs->len);
+ pr_notice("directory entry would overflow storage\n");
+ pr_notice("sig=0x%02x, size=%d, remaining=%d\n",
+ sig, len, rs->len);
return -EIO;
}
return 0;
@@ -257,7 +257,7 @@ repeat:
break;
if (rr->u.NM.flags & ~1) {
- printk("Unsupported NM flag settings (%d)\n",
+ pr_warn("Unsupported NM flag settings (%d)\n",
rr->u.NM.flags);
break;
}
@@ -353,13 +353,13 @@ repeat:
break;
case SIG('E', 'R'):
ISOFS_SB(inode->i_sb)->s_rock = 1;
- printk(KERN_DEBUG "ISO 9660 Extensions: ");
+ pr_debug("ISO 9660 Extensions: ");
{
int p;
for (p = 0; p < rr->u.ER.len_id; p++)
- printk("%c", rr->u.ER.data[p]);
+ pr_warn("%c", rr->u.ER.data[p]);
}
- printk("\n");
+ pr_warn("\n");
break;
case SIG('P', 'X'):
inode->i_mode = isonum_733(rr->u.PX.mode);
@@ -450,8 +450,7 @@ repeat:
inode->i_size += 1;
break;
default:
- printk("Symlink component flag "
- "not implemented\n");
+ pr_warn("Symlink component flag not implemented\n");
}
slen -= slp->len + 2;
oldslp = slp;
@@ -481,8 +480,7 @@ repeat:
symlink_len = inode->i_size;
break;
case SIG('R', 'E'):
- printk(KERN_WARNING "Attempt to read inode for "
- "relocated directory\n");
+ pr_warn("Attempt to read inode for relocated directory\n");
goto out;
case SIG('C', 'L'):
ISOFS_I(inode)->i_first_extent =
@@ -518,9 +516,7 @@ repeat:
int block_shift =
isonum_711(&rr->u.ZF.parms[1]);
if (block_shift > 17) {
- printk(KERN_WARNING "isofs: "
- "Can't handle ZF block "
- "size of 2^%d\n",
+ pr_warn("Can't handle ZF block size of 2^%d\n",
block_shift);
} else {
/*
@@ -543,9 +539,7 @@ repeat:
real_size);
}
} else {
- printk(KERN_WARNING
- "isofs: Unknown ZF compression "
- "algorithm: %c%c\n",
+ pr_warn("Unknown ZF compression algorithm: %c%c\n",
rr->u.ZF.algorithm[0],
rr->u.ZF.algorithm[1]);
}
@@ -604,7 +598,7 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
*rpnt++ = '/';
break;
default:
- printk("Symlink component flag not implemented (%d)\n",
+ pr_warn("Symlink component flag not implemented (%d)\n",
slp->flags);
}
slen -= slp->len + 2;
@@ -757,10 +751,10 @@ out:
kfree(rs.buffer);
goto fail;
out_noread:
- printk("unable to read i-node block");
+ pr_warn("unable to read i-node block");
goto fail;
out_bad_span:
- printk("symlink spans iso9660 blocks\n");
+ pr_warn("symlink spans iso9660 blocks\n");
fail:
brelse(bh);
error:
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 06fe11e0abfa..26ebba7d8bc3 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -810,7 +810,8 @@ journal_t * journal_init_dev(struct block_device *bdev,
journal->j_blocksize = blocksize;
n = journal->j_blocksize / sizeof(journal_block_tag_t);
journal->j_wbufsize = n;
- journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
+ journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
+ GFP_KERNEL);
if (!journal->j_wbuf) {
printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
__func__);
@@ -871,7 +872,8 @@ journal_t * journal_init_inode (struct inode *inode)
/* journal descriptor can store up to n blocks -bzzz */
n = journal->j_blocksize / sizeof(journal_block_tag_t);
journal->j_wbufsize = n;
- journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
+ journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
+ GFP_KERNEL);
if (!journal->j_wbuf) {
printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
__func__);
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 8898bbd2b61e..7374a57f2853 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -241,7 +241,7 @@ static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
table->hash_size = hash_size;
table->hash_shift = ilog2(hash_size);
table->hash_table =
- kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
+ kmalloc_array(hash_size, sizeof(struct list_head), GFP_KERNEL);
if (!table->hash_table) {
kmem_cache_free(revoke_table_cache, table);
table = NULL;
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 0b9a1e44e833..5698dae5d92d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -94,11 +94,12 @@ static int jffs2_zlib_compress(unsigned char *data_in,
while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) {
def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE);
- def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out);
- jffs2_dbg(1, "calling deflate with avail_in %d, avail_out %d\n",
+ def_strm.avail_in = min_t(unsigned long,
+ (*sourcelen-def_strm.total_in), def_strm.avail_out);
+ jffs2_dbg(1, "calling deflate with avail_in %ld, avail_out %ld\n",
def_strm.avail_in, def_strm.avail_out);
ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH);
- jffs2_dbg(1, "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n",
+ jffs2_dbg(1, "deflate returned with avail_in %ld, avail_out %ld, total_in %ld, total_out %ld\n",
def_strm.avail_in, def_strm.avail_out,
def_strm.total_in, def_strm.total_out);
if (ret != Z_OK) {
diff --git a/fs/mpage.c b/fs/mpage.c
index 5f9ed622274f..003f6fe3cdb6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -480,6 +480,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
@@ -588,7 +589,7 @@ page_is_mapped:
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (bio && mpd->last_block_in_bio != blocks[0] - 1)
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
alloc_new:
if (bio == NULL) {
@@ -612,7 +613,7 @@ alloc_new:
*/
length = first_unmapped << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
goto alloc_new;
}
@@ -622,7 +623,7 @@ alloc_new:
set_page_writeback(page);
unlock_page(page);
if (boundary || (first_unmapped != blocks_per_page)) {
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
@@ -634,7 +635,7 @@ alloc_new:
confused:
if (bio)
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
if (mpd->use_writepage) {
ret = mapping->a_ops->writepage(page, wbc);
@@ -690,8 +691,11 @@ mpage_writepages(struct address_space *mapping,
};
ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
- if (mpd.bio)
- mpage_bio_submit(WRITE, mpd.bio);
+ if (mpd.bio) {
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : WRITE);
+ mpage_bio_submit(wr, mpd.bio);
+ }
}
blk_finish_plug(&plug);
return ret;
@@ -708,8 +712,11 @@ int mpage_writepage(struct page *page, get_block_t get_block,
.use_writepage = 0,
};
int ret = __mpage_writepage(page, wbc, &mpd);
- if (mpd.bio)
- mpage_bio_submit(WRITE, mpd.bio);
+ if (mpd.bio) {
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : WRITE);
+ mpage_bio_submit(wr, mpd.bio);
+ }
return ret;
}
EXPORT_SYMBOL(mpage_writepage);
diff --git a/fs/namespace.c b/fs/namespace.c
index b10db3d69943..019ff81536a2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -845,7 +845,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
list_splice(&head, n->list.prev);
if (shadows)
- hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+ hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
else
hlist_add_head_rcu(&mnt->mnt_hash,
m_hash(&parent->mnt, mnt->mnt_mountpoint));
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile
index 85c98737a146..fc603e0431bb 100644
--- a/fs/nilfs2/Makefile
+++ b/fs/nilfs2/Makefile
@@ -2,4 +2,4 @@ obj-$(CONFIG_NILFS2_FS) += nilfs2.o
nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \
btnode.o bmap.o btree.o direct.o dat.o recovery.o \
the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \
- ifile.o alloc.o gcinode.o ioctl.o
+ ifile.o alloc.o gcinode.o ioctl.o sysfs.o
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 9bc72dec3fa6..0696161bf59d 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -320,6 +320,14 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *);
int nilfs_init_gcinode(struct inode *inode);
void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs);
+/* sysfs.c */
+int __init nilfs_sysfs_init(void);
+void nilfs_sysfs_exit(void);
+int nilfs_sysfs_create_device_group(struct super_block *);
+void nilfs_sysfs_delete_device_group(struct the_nilfs *);
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *);
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *);
+
/*
* Inodes and files operations
*/
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8c532b2ca3ab..48c9ce8c983d 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1014,7 +1014,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
struct dentry *dentry;
int ret;
- if (cno < 0 || cno > nilfs->ns_cno)
+ if (cno > nilfs->ns_cno)
return false;
if (cno >= nilfs_last_cno(nilfs))
@@ -1452,13 +1452,19 @@ static int __init init_nilfs_fs(void)
if (err)
goto fail;
- err = register_filesystem(&nilfs_fs_type);
+ err = nilfs_sysfs_init();
if (err)
goto free_cachep;
+ err = register_filesystem(&nilfs_fs_type);
+ if (err)
+ goto deinit_sysfs_entry;
+
printk(KERN_INFO "NILFS version 2 loaded\n");
return 0;
+deinit_sysfs_entry:
+ nilfs_sysfs_exit();
free_cachep:
nilfs_destroy_cachep();
fail:
@@ -1468,6 +1474,7 @@ fail:
static void __exit exit_nilfs_fs(void)
{
nilfs_destroy_cachep();
+ nilfs_sysfs_exit();
unregister_filesystem(&nilfs_fs_type);
}
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
new file mode 100644
index 000000000000..0f6148c8121a
--- /dev/null
+++ b/fs/nilfs2/sysfs.c
@@ -0,0 +1,1137 @@
+/*
+ * sysfs.c - sysfs support implementation.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#include <linux/kobject.h>
+
+#include "nilfs.h"
+#include "mdt.h"
+#include "sufile.h"
+#include "cpfile.h"
+#include "sysfs.h"
+
+/* /sys/fs/<nilfs>/ */
+static struct kset *nilfs_kset;
+
+#define NILFS_SHOW_TIME(time_t_val, buf) ({ \
+ struct tm res; \
+ int count = 0; \
+ time_to_tm(time_t_val, 0, &res); \
+ res.tm_year += 1900; \
+ res.tm_mon += 1; \
+ count = scnprintf(buf, PAGE_SIZE, \
+ "%ld-%.2d-%.2d %.2d:%.2d:%.2d\n", \
+ res.tm_year, res.tm_mon, res.tm_mday, \
+ res.tm_hour, res.tm_min, res.tm_sec);\
+ count; \
+})
+
+#define NILFS_DEV_INT_GROUP_OPS(name, parent_name) \
+static ssize_t nilfs_##name##_attr_show(struct kobject *kobj, \
+ struct attribute *attr, char *buf) \
+{ \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ struct nilfs_##name##_attr *a = container_of(attr, \
+ struct nilfs_##name##_attr, \
+ attr); \
+ return a->show ? a->show(a, nilfs, buf) : 0; \
+} \
+static ssize_t nilfs_##name##_attr_store(struct kobject *kobj, \
+ struct attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ struct nilfs_##name##_attr *a = container_of(attr, \
+ struct nilfs_##name##_attr, \
+ attr); \
+ return a->store ? a->store(a, nilfs, buf, len) : 0; \
+} \
+static const struct sysfs_ops nilfs_##name##_attr_ops = { \
+ .show = nilfs_##name##_attr_show, \
+ .store = nilfs_##name##_attr_store, \
+};
+
+#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
+static void nilfs_##name##_attr_release(struct kobject *kobj) \
+{ \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ subgroups = nilfs->ns_##parent_name##_subgroups; \
+ complete(&subgroups->sg_##name##_kobj_unregister); \
+} \
+static struct kobj_type nilfs_##name##_ktype = { \
+ .default_attrs = nilfs_##name##_attrs, \
+ .sysfs_ops = &nilfs_##name##_attr_ops, \
+ .release = nilfs_##name##_attr_release, \
+};
+
+#define NILFS_DEV_INT_GROUP_FNS(name, parent_name) \
+int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
+{ \
+ struct kobject *parent; \
+ struct kobject *kobj; \
+ struct completion *kobj_unregister; \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
+ int err; \
+ subgroups = nilfs->ns_##parent_name##_subgroups; \
+ kobj = &subgroups->sg_##name##_kobj; \
+ kobj_unregister = &subgroups->sg_##name##_kobj_unregister; \
+ parent = &nilfs->ns_##parent_name##_kobj; \
+ kobj->kset = nilfs_kset; \
+ init_completion(kobj_unregister); \
+ err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
+ #name); \
+ if (err) \
+ return err; \
+ return 0; \
+} \
+void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
+{ \
+ kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
+}
+
+/************************************************************************
+ * NILFS snapshot attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_snapshot_inodes_count_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)atomic64_read(&root->inodes_count));
+}
+
+static ssize_t
+nilfs_snapshot_blocks_count_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)atomic64_read(&root->blocks_count));
+}
+
+static const char snapshot_readme_str[] =
+ "The group contains details about mounted snapshot.\n\n"
+ "(1) inodes_count\n\tshow number of inodes for snapshot.\n\n"
+ "(2) blocks_count\n\tshow number of blocks for snapshot.\n\n";
+
+static ssize_t
+nilfs_snapshot_README_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, snapshot_readme_str);
+}
+
+NILFS_SNAPSHOT_RO_ATTR(inodes_count);
+NILFS_SNAPSHOT_RO_ATTR(blocks_count);
+NILFS_SNAPSHOT_RO_ATTR(README);
+
+static struct attribute *nilfs_snapshot_attrs[] = {
+ NILFS_SNAPSHOT_ATTR_LIST(inodes_count),
+ NILFS_SNAPSHOT_ATTR_LIST(blocks_count),
+ NILFS_SNAPSHOT_ATTR_LIST(README),
+ NULL,
+};
+
+static ssize_t nilfs_snapshot_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct nilfs_root *root =
+ container_of(kobj, struct nilfs_root, snapshot_kobj);
+ struct nilfs_snapshot_attr *a =
+ container_of(attr, struct nilfs_snapshot_attr, attr);
+
+ return a->show ? a->show(a, root, buf) : 0;
+}
+
+static ssize_t nilfs_snapshot_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct nilfs_root *root =
+ container_of(kobj, struct nilfs_root, snapshot_kobj);
+ struct nilfs_snapshot_attr *a =
+ container_of(attr, struct nilfs_snapshot_attr, attr);
+
+ return a->store ? a->store(a, root, buf, len) : 0;
+}
+
+static void nilfs_snapshot_attr_release(struct kobject *kobj)
+{
+ struct nilfs_root *root = container_of(kobj, struct nilfs_root,
+ snapshot_kobj);
+ complete(&root->snapshot_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_snapshot_attr_ops = {
+ .show = nilfs_snapshot_attr_show,
+ .store = nilfs_snapshot_attr_store,
+};
+
+static struct kobj_type nilfs_snapshot_ktype = {
+ .default_attrs = nilfs_snapshot_attrs,
+ .sysfs_ops = &nilfs_snapshot_attr_ops,
+ .release = nilfs_snapshot_attr_release,
+};
+
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
+{
+ struct the_nilfs *nilfs;
+ struct kobject *parent;
+ int err;
+
+ nilfs = root->nilfs;
+ parent = &nilfs->ns_dev_subgroups->sg_mounted_snapshots_kobj;
+ root->snapshot_kobj.kset = nilfs_kset;
+ init_completion(&root->snapshot_kobj_unregister);
+
+ if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
+ err = kobject_init_and_add(&root->snapshot_kobj,
+ &nilfs_snapshot_ktype,
+ &nilfs->ns_dev_kobj,
+ "current_checkpoint");
+ } else {
+ err = kobject_init_and_add(&root->snapshot_kobj,
+ &nilfs_snapshot_ktype,
+ parent,
+ "%llu", root->cno);
+ }
+
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
+{
+ kobject_del(&root->snapshot_kobj);
+}
+
+/************************************************************************
+ * NILFS mounted snapshots attrs *
+ ************************************************************************/
+
+static const char mounted_snapshots_readme_str[] =
+ "The mounted_snapshots group contains group for\n"
+ "every mounted snapshot.\n";
+
+static ssize_t
+nilfs_mounted_snapshots_README_show(struct nilfs_mounted_snapshots_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, mounted_snapshots_readme_str);
+}
+
+NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(README);
+
+static struct attribute *nilfs_mounted_snapshots_attrs[] = {
+ NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_TYPE(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_FNS(mounted_snapshots, dev);
+
+/************************************************************************
+ * NILFS checkpoints attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 ncheckpoints;
+ struct nilfs_cpstat cpstat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ ncheckpoints = cpstat.cs_ncps;
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", ncheckpoints);
+}
+
+static ssize_t
+nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 nsnapshots;
+ struct nilfs_cpstat cpstat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ nsnapshots = cpstat.cs_nsss;
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", nsnapshots);
+}
+
+static ssize_t
+nilfs_checkpoints_last_seg_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 last_cno;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_cno = nilfs->ns_last_cno;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_checkpoints_next_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 cno;
+
+ down_read(&nilfs->ns_sem);
+ cno = nilfs->ns_cno;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
+}
+
+static const char checkpoints_readme_str[] =
+ "The checkpoints group contains attributes that describe\n"
+ "details about volume's checkpoints.\n\n"
+ "(1) checkpoints_number\n\tshow number of checkpoints on volume.\n\n"
+ "(2) snapshots_number\n\tshow number of snapshots on volume.\n\n"
+ "(3) last_seg_checkpoint\n"
+ "\tshow checkpoint number of the latest segment.\n\n"
+ "(4) next_checkpoint\n\tshow next checkpoint number.\n\n";
+
+static ssize_t
+nilfs_checkpoints_README_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, checkpoints_readme_str);
+}
+
+NILFS_CHECKPOINTS_RO_ATTR(checkpoints_number);
+NILFS_CHECKPOINTS_RO_ATTR(snapshots_number);
+NILFS_CHECKPOINTS_RO_ATTR(last_seg_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(next_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(README);
+
+static struct attribute *nilfs_checkpoints_attrs[] = {
+ NILFS_CHECKPOINTS_ATTR_LIST(checkpoints_number),
+ NILFS_CHECKPOINTS_ATTR_LIST(snapshots_number),
+ NILFS_CHECKPOINTS_ATTR_LIST(last_seg_checkpoint),
+ NILFS_CHECKPOINTS_ATTR_LIST(next_checkpoint),
+ NILFS_CHECKPOINTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(checkpoints, dev);
+NILFS_DEV_INT_GROUP_TYPE(checkpoints, dev);
+NILFS_DEV_INT_GROUP_FNS(checkpoints, dev);
+
+/************************************************************************
+ * NILFS segments attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segments_segments_number_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_nsegments);
+}
+
+static ssize_t
+nilfs_segments_blocks_per_segment_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_blocks_per_segment);
+}
+
+static ssize_t
+nilfs_segments_clean_segments_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned long ncleansegs;
+
+ down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+ ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
+ up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", ncleansegs);
+}
+
+static ssize_t
+nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_sustat sustat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get segment stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", sustat.ss_ndirtysegs);
+}
+
+static const char segments_readme_str[] =
+ "The segments group contains attributes that describe\n"
+ "details about volume's segments.\n\n"
+ "(1) segments_number\n\tshow number of segments on volume.\n\n"
+ "(2) blocks_per_segment\n\tshow number of blocks in segment.\n\n"
+ "(3) clean_segments\n\tshow count of clean segments.\n\n"
+ "(4) dirty_segments\n\tshow count of dirty segments.\n\n";
+
+static ssize_t
+nilfs_segments_README_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, segments_readme_str);
+}
+
+NILFS_SEGMENTS_RO_ATTR(segments_number);
+NILFS_SEGMENTS_RO_ATTR(blocks_per_segment);
+NILFS_SEGMENTS_RO_ATTR(clean_segments);
+NILFS_SEGMENTS_RO_ATTR(dirty_segments);
+NILFS_SEGMENTS_RO_ATTR(README);
+
+static struct attribute *nilfs_segments_attrs[] = {
+ NILFS_SEGMENTS_ATTR_LIST(segments_number),
+ NILFS_SEGMENTS_ATTR_LIST(blocks_per_segment),
+ NILFS_SEGMENTS_ATTR_LIST(clean_segments),
+ NILFS_SEGMENTS_ATTR_LIST(dirty_segments),
+ NILFS_SEGMENTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(segments, dev);
+NILFS_DEV_INT_GROUP_TYPE(segments, dev);
+NILFS_DEV_INT_GROUP_FNS(segments, dev);
+
+/************************************************************************
+ * NILFS segctor attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segctor_last_pseg_block_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ sector_t last_pseg;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_pseg = nilfs->ns_last_pseg;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)last_pseg);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_sequence_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u64 last_seq;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_seq = nilfs->ns_last_seq;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_seq);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_checkpoint_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 last_cno;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_cno = nilfs->ns_last_cno;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_segctor_current_seg_sequence_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u64 seg_seq;
+
+ down_read(&nilfs->ns_sem);
+ seg_seq = nilfs->ns_seg_seq;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", seg_seq);
+}
+
+static ssize_t
+nilfs_segctor_current_last_full_seg_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 segnum;
+
+ down_read(&nilfs->ns_sem);
+ segnum = nilfs->ns_segnum;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", segnum);
+}
+
+static ssize_t
+nilfs_segctor_next_full_seg_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 nextnum;
+
+ down_read(&nilfs->ns_sem);
+ nextnum = nilfs->ns_nextnum;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", nextnum);
+}
+
+static ssize_t
+nilfs_segctor_next_pseg_offset_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned long pseg_offset;
+
+ down_read(&nilfs->ns_sem);
+ pseg_offset = nilfs->ns_pseg_offset;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", pseg_offset);
+}
+
+static ssize_t
+nilfs_segctor_next_checkpoint_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 cno;
+
+ down_read(&nilfs->ns_sem);
+ cno = nilfs->ns_cno;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t ctime;
+
+ down_read(&nilfs->ns_sem);
+ ctime = nilfs->ns_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(ctime, buf);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t ctime;
+
+ down_read(&nilfs->ns_sem);
+ ctime = nilfs->ns_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)ctime);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t nongc_ctime;
+
+ down_read(&nilfs->ns_sem);
+ nongc_ctime = nilfs->ns_nongc_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(nongc_ctime, buf);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t nongc_ctime;
+
+ down_read(&nilfs->ns_sem);
+ nongc_ctime = nilfs->ns_nongc_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)nongc_ctime);
+}
+
+static ssize_t
+nilfs_segctor_dirty_data_blocks_count_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u32 ndirtyblks;
+
+ down_read(&nilfs->ns_sem);
+ ndirtyblks = atomic_read(&nilfs->ns_ndirtyblks);
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", ndirtyblks);
+}
+
+static const char segctor_readme_str[] =
+ "The segctor group contains attributes that describe\n"
+ "segctor thread activity details.\n\n"
+ "(1) last_pseg_block\n"
+ "\tshow start block number of the latest segment.\n\n"
+ "(2) last_seg_sequence\n"
+ "\tshow sequence value of the latest segment.\n\n"
+ "(3) last_seg_checkpoint\n"
+ "\tshow checkpoint number of the latest segment.\n\n"
+ "(4) current_seg_sequence\n\tshow segment sequence counter.\n\n"
+ "(5) current_last_full_seg\n"
+ "\tshow index number of the latest full segment.\n\n"
+ "(6) next_full_seg\n"
+ "\tshow index number of the full segment index to be used next.\n\n"
+ "(7) next_pseg_offset\n"
+ "\tshow offset of next partial segment in the current full segment.\n\n"
+ "(8) next_checkpoint\n\tshow next checkpoint number.\n\n"
+ "(9) last_seg_write_time\n"
+ "\tshow write time of the last segment in human-readable format.\n\n"
+ "(10) last_seg_write_time_secs\n"
+ "\tshow write time of the last segment in seconds.\n\n"
+ "(11) last_nongc_write_time\n"
+ "\tshow write time of the last segment not for cleaner operation "
+ "in human-readable format.\n\n"
+ "(12) last_nongc_write_time_secs\n"
+ "\tshow write time of the last segment not for cleaner operation "
+ "in seconds.\n\n"
+ "(13) dirty_data_blocks_count\n"
+ "\tshow number of dirty data blocks.\n\n";
+
+static ssize_t
+nilfs_segctor_README_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, segctor_readme_str);
+}
+
+NILFS_SEGCTOR_RO_ATTR(last_pseg_block);
+NILFS_SEGCTOR_RO_ATTR(last_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(last_seg_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(current_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(current_last_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_pseg_offset);
+NILFS_SEGCTOR_RO_ATTR(next_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(dirty_data_blocks_count);
+NILFS_SEGCTOR_RO_ATTR(README);
+
+static struct attribute *nilfs_segctor_attrs[] = {
+ NILFS_SEGCTOR_ATTR_LIST(last_pseg_block),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_sequence),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_checkpoint),
+ NILFS_SEGCTOR_ATTR_LIST(current_seg_sequence),
+ NILFS_SEGCTOR_ATTR_LIST(current_last_full_seg),
+ NILFS_SEGCTOR_ATTR_LIST(next_full_seg),
+ NILFS_SEGCTOR_ATTR_LIST(next_pseg_offset),
+ NILFS_SEGCTOR_ATTR_LIST(next_checkpoint),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time_secs),
+ NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time),
+ NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time_secs),
+ NILFS_SEGCTOR_ATTR_LIST(dirty_data_blocks_count),
+ NILFS_SEGCTOR_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(segctor, dev);
+NILFS_DEV_INT_GROUP_TYPE(segctor, dev);
+NILFS_DEV_INT_GROUP_FNS(segctor, dev);
+
+/************************************************************************
+ * NILFS superblock attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t sbwtime;
+
+ down_read(&nilfs->ns_sem);
+ sbwtime = nilfs->ns_sbwtime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(sbwtime, buf);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t sbwtime;
+
+ down_read(&nilfs->ns_sem);
+ sbwtime = nilfs->ns_sbwtime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)sbwtime);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_count_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned sbwcount;
+
+ down_read(&nilfs->ns_sem);
+ sbwcount = nilfs->ns_sbwcount;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbwcount);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned sb_update_freq;
+
+ down_read(&nilfs->ns_sem);
+ sb_update_freq = nilfs->ns_sb_update_freq;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sb_update_freq);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_store(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ const char *buf, size_t count)
+{
+ unsigned val;
+ int err;
+
+ err = kstrtouint(skip_spaces(buf), 0, &val);
+ if (err) {
+ printk(KERN_ERR "NILFS: unable to convert string: err=%d\n",
+ err);
+ return err;
+ }
+
+ if (val < NILFS_SB_FREQ) {
+ val = NILFS_SB_FREQ;
+ printk(KERN_WARNING "NILFS: superblock update frequency cannot be lesser than 10 seconds\n");
+ }
+
+ down_write(&nilfs->ns_sem);
+ nilfs->ns_sb_update_freq = val;
+ up_write(&nilfs->ns_sem);
+
+ return count;
+}
+
+static const char sb_readme_str[] =
+ "The superblock group contains attributes that describe\n"
+ "superblock's details.\n\n"
+ "(1) sb_write_time\n\tshow previous write time of super block "
+ "in human-readable format.\n\n"
+ "(2) sb_write_time_secs\n\tshow previous write time of super block "
+ "in seconds.\n\n"
+ "(3) sb_write_count\n\tshow write count of super block.\n\n"
+ "(4) sb_update_frequency\n"
+ "\tshow/set interval of periodical update of superblock (in seconds).\n\n"
+ "\tYou can set preferable frequency of superblock update by command:\n\n"
+ "\t'echo <val> > /sys/fs/<nilfs>/<dev>/superblock/sb_update_frequency'\n";
+
+static ssize_t
+nilfs_superblock_README_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, sb_readme_str);
+}
+
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time_secs);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_count);
+NILFS_SUPERBLOCK_RW_ATTR(sb_update_frequency);
+NILFS_SUPERBLOCK_RO_ATTR(README);
+
+static struct attribute *nilfs_superblock_attrs[] = {
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time_secs),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_count),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_update_frequency),
+ NILFS_SUPERBLOCK_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(superblock, dev);
+NILFS_DEV_INT_GROUP_TYPE(superblock, dev);
+NILFS_DEV_INT_GROUP_FNS(superblock, dev);
+
+/************************************************************************
+ * NILFS device attrs *
+ ************************************************************************/
+
+static
+ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ u32 major = le32_to_cpu(sbp[0]->s_rev_level);
+ u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
+
+ return snprintf(buf, PAGE_SIZE, "%d.%d\n", major, minor);
+}
+
+static
+ssize_t nilfs_dev_blocksize_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", nilfs->ns_blocksize);
+}
+
+static
+ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", dev_size);
+}
+
+static
+ssize_t nilfs_dev_free_blocks_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ sector_t free_blocks = 0;
+
+ nilfs_count_free_blocks(nilfs, &free_blocks);
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)free_blocks);
+}
+
+static
+ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+
+ return snprintf(buf, PAGE_SIZE, "%pUb\n", sbp[0]->s_uuid);
+}
+
+static
+ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+
+ return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
+ sbp[0]->s_volume_name);
+}
+
+static const char dev_readme_str[] =
+ "The <device> group contains attributes that describe file system\n"
+ "partition's details.\n\n"
+ "(1) revision\n\tshow NILFS file system revision.\n\n"
+ "(2) blocksize\n\tshow volume block size in bytes.\n\n"
+ "(3) device_size\n\tshow volume size in bytes.\n\n"
+ "(4) free_blocks\n\tshow count of free blocks on volume.\n\n"
+ "(5) uuid\n\tshow volume's UUID.\n\n"
+ "(6) volume_name\n\tshow volume's name.\n\n";
+
+static ssize_t nilfs_dev_README_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, dev_readme_str);
+}
+
+NILFS_DEV_RO_ATTR(revision);
+NILFS_DEV_RO_ATTR(blocksize);
+NILFS_DEV_RO_ATTR(device_size);
+NILFS_DEV_RO_ATTR(free_blocks);
+NILFS_DEV_RO_ATTR(uuid);
+NILFS_DEV_RO_ATTR(volume_name);
+NILFS_DEV_RO_ATTR(README);
+
+static struct attribute *nilfs_dev_attrs[] = {
+ NILFS_DEV_ATTR_LIST(revision),
+ NILFS_DEV_ATTR_LIST(blocksize),
+ NILFS_DEV_ATTR_LIST(device_size),
+ NILFS_DEV_ATTR_LIST(free_blocks),
+ NILFS_DEV_ATTR_LIST(uuid),
+ NILFS_DEV_ATTR_LIST(volume_name),
+ NILFS_DEV_ATTR_LIST(README),
+ NULL,
+};
+
+static ssize_t nilfs_dev_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+ attr);
+
+ return a->show ? a->show(a, nilfs, buf) : 0;
+}
+
+static ssize_t nilfs_dev_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+ attr);
+
+ return a->store ? a->store(a, nilfs, buf, len) : 0;
+}
+
+static void nilfs_dev_attr_release(struct kobject *kobj)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ complete(&nilfs->ns_dev_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_dev_attr_ops = {
+ .show = nilfs_dev_attr_show,
+ .store = nilfs_dev_attr_store,
+};
+
+static struct kobj_type nilfs_dev_ktype = {
+ .default_attrs = nilfs_dev_attrs,
+ .sysfs_ops = &nilfs_dev_attr_ops,
+ .release = nilfs_dev_attr_release,
+};
+
+int nilfs_sysfs_create_device_group(struct super_block *sb)
+{
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ size_t devgrp_size = sizeof(struct nilfs_sysfs_dev_subgroups);
+ int err;
+
+ nilfs->ns_dev_subgroups = kzalloc(devgrp_size, GFP_KERNEL);
+ if (unlikely(!nilfs->ns_dev_subgroups)) {
+ err = -ENOMEM;
+ printk(KERN_ERR "NILFS: unable to allocate memory for device group\n");
+ goto failed_create_device_group;
+ }
+
+ nilfs->ns_dev_kobj.kset = nilfs_kset;
+ init_completion(&nilfs->ns_dev_kobj_unregister);
+ err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
+ "%s", sb->s_id);
+ if (err)
+ goto free_dev_subgroups;
+
+ err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
+ if (err)
+ goto cleanup_dev_kobject;
+
+ err = nilfs_sysfs_create_checkpoints_group(nilfs);
+ if (err)
+ goto delete_mounted_snapshots_group;
+
+ err = nilfs_sysfs_create_segments_group(nilfs);
+ if (err)
+ goto delete_checkpoints_group;
+
+ err = nilfs_sysfs_create_superblock_group(nilfs);
+ if (err)
+ goto delete_segments_group;
+
+ err = nilfs_sysfs_create_segctor_group(nilfs);
+ if (err)
+ goto delete_superblock_group;
+
+ return 0;
+
+delete_superblock_group:
+ nilfs_sysfs_delete_superblock_group(nilfs);
+
+delete_segments_group:
+ nilfs_sysfs_delete_segments_group(nilfs);
+
+delete_checkpoints_group:
+ nilfs_sysfs_delete_checkpoints_group(nilfs);
+
+delete_mounted_snapshots_group:
+ nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+
+cleanup_dev_kobject:
+ kobject_del(&nilfs->ns_dev_kobj);
+
+free_dev_subgroups:
+ kfree(nilfs->ns_dev_subgroups);
+
+failed_create_device_group:
+ return err;
+}
+
+void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
+{
+ nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+ nilfs_sysfs_delete_checkpoints_group(nilfs);
+ nilfs_sysfs_delete_segments_group(nilfs);
+ nilfs_sysfs_delete_superblock_group(nilfs);
+ nilfs_sysfs_delete_segctor_group(nilfs);
+ kobject_del(&nilfs->ns_dev_kobj);
+ kfree(nilfs->ns_dev_subgroups);
+}
+
+/************************************************************************
+ * NILFS feature attrs *
+ ************************************************************************/
+
+static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d.%d\n",
+ NILFS_CURRENT_REV, NILFS_MINOR_REV);
+}
+
+static const char features_readme_str[] =
+ "The features group contains attributes that describe NILFS file\n"
+ "system driver features.\n\n"
+ "(1) revision\n\tshow current revision of NILFS file system driver.\n";
+
+static ssize_t nilfs_feature_README_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, features_readme_str);
+}
+
+NILFS_FEATURE_RO_ATTR(revision);
+NILFS_FEATURE_RO_ATTR(README);
+
+static struct attribute *nilfs_feature_attrs[] = {
+ NILFS_FEATURE_ATTR_LIST(revision),
+ NILFS_FEATURE_ATTR_LIST(README),
+ NULL,
+};
+
+static const struct attribute_group nilfs_feature_attr_group = {
+ .name = "features",
+ .attrs = nilfs_feature_attrs,
+};
+
+int __init nilfs_sysfs_init(void)
+{
+ int err;
+
+ nilfs_kset = kset_create_and_add(NILFS_ROOT_GROUP_NAME, NULL, fs_kobj);
+ if (!nilfs_kset) {
+ err = -ENOMEM;
+ printk(KERN_ERR "NILFS: unable to create sysfs entry: err %d\n",
+ err);
+ goto failed_sysfs_init;
+ }
+
+ err = sysfs_create_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+ if (unlikely(err)) {
+ printk(KERN_ERR "NILFS: unable to create feature group: err %d\n",
+ err);
+ goto cleanup_sysfs_init;
+ }
+
+ return 0;
+
+cleanup_sysfs_init:
+ kset_unregister(nilfs_kset);
+
+failed_sysfs_init:
+ return err;
+}
+
+void nilfs_sysfs_exit(void)
+{
+ sysfs_remove_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+ kset_unregister(nilfs_kset);
+}
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
new file mode 100644
index 000000000000..677e3a1a8370
--- /dev/null
+++ b/fs/nilfs2/sysfs.h
@@ -0,0 +1,176 @@
+/*
+ * sysfs.h - sysfs support declarations.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#ifndef _NILFS_SYSFS_H
+#define _NILFS_SYSFS_H
+
+#include <linux/sysfs.h>
+
+#define NILFS_ROOT_GROUP_NAME "nilfs2"
+
+/*
+ * struct nilfs_sysfs_dev_subgroups - device subgroup kernel objects
+ * @sg_superblock_kobj: /sys/fs/<nilfs>/<device>/superblock
+ * @sg_superblock_kobj_unregister: completion state
+ * @sg_segctor_kobj: /sys/fs/<nilfs>/<device>/segctor
+ * @sg_segctor_kobj_unregister: completion state
+ * @sg_mounted_snapshots_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots
+ * @sg_mounted_snapshots_kobj_unregister: completion state
+ * @sg_checkpoints_kobj: /sys/fs/<nilfs>/<device>/checkpoints
+ * @sg_checkpoints_kobj_unregister: completion state
+ * @sg_segments_kobj: /sys/fs/<nilfs>/<device>/segments
+ * @sg_segments_kobj_unregister: completion state
+ */
+struct nilfs_sysfs_dev_subgroups {
+ /* /sys/fs/<nilfs>/<device>/superblock */
+ struct kobject sg_superblock_kobj;
+ struct completion sg_superblock_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/segctor */
+ struct kobject sg_segctor_kobj;
+ struct completion sg_segctor_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/mounted_snapshots */
+ struct kobject sg_mounted_snapshots_kobj;
+ struct completion sg_mounted_snapshots_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/checkpoints */
+ struct kobject sg_checkpoints_kobj;
+ struct completion sg_checkpoints_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/segments */
+ struct kobject sg_segments_kobj;
+ struct completion sg_segments_kobj_unregister;
+};
+
+#define NILFS_COMMON_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct kobject *, struct attribute *, \
+ char *); \
+ ssize_t (*store)(struct kobject *, struct attribute *, \
+ const char *, size_t); \
+};
+
+NILFS_COMMON_ATTR_STRUCT(feature);
+
+#define NILFS_DEV_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+ char *); \
+ ssize_t (*store)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+ const char *, size_t); \
+};
+
+NILFS_DEV_ATTR_STRUCT(dev);
+NILFS_DEV_ATTR_STRUCT(segments);
+NILFS_DEV_ATTR_STRUCT(mounted_snapshots);
+NILFS_DEV_ATTR_STRUCT(checkpoints);
+NILFS_DEV_ATTR_STRUCT(superblock);
+NILFS_DEV_ATTR_STRUCT(segctor);
+
+#define NILFS_CP_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+ char *); \
+ ssize_t (*store)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+ const char *, size_t); \
+};
+
+NILFS_CP_ATTR_STRUCT(snapshot);
+
+#define NILFS_ATTR(type, name, mode, show, store) \
+ static struct nilfs_##type##_attr nilfs_##type##_attr_##name = \
+ __ATTR(name, mode, show, store)
+
+#define NILFS_INFO_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0444, NULL, NULL)
+#define NILFS_RO_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0444, nilfs_##type##_##name##_show, NULL)
+#define NILFS_RW_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0644, \
+ nilfs_##type##_##name##_show, \
+ nilfs_##type##_##name##_store)
+
+#define NILFS_FEATURE_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(feature, name)
+#define NILFS_FEATURE_RO_ATTR(name) \
+ NILFS_RO_ATTR(feature, name)
+#define NILFS_FEATURE_RW_ATTR(name) \
+ NILFS_RW_ATTR(feature, name)
+
+#define NILFS_DEV_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(dev, name)
+#define NILFS_DEV_RO_ATTR(name) \
+ NILFS_RO_ATTR(dev, name)
+#define NILFS_DEV_RW_ATTR(name) \
+ NILFS_RW_ATTR(dev, name)
+
+#define NILFS_SEGMENTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(segments, name)
+#define NILFS_SEGMENTS_RW_ATTR(name) \
+ NILFS_RW_ATTR(segs_info, name)
+
+#define NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(mounted_snapshots, name)
+
+#define NILFS_CHECKPOINTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(checkpoints, name)
+#define NILFS_CHECKPOINTS_RW_ATTR(name) \
+ NILFS_RW_ATTR(checkpoints, name)
+
+#define NILFS_SNAPSHOT_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RO_ATTR(name) \
+ NILFS_RO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RW_ATTR(name) \
+ NILFS_RW_ATTR(snapshot, name)
+
+#define NILFS_SUPERBLOCK_RO_ATTR(name) \
+ NILFS_RO_ATTR(superblock, name)
+#define NILFS_SUPERBLOCK_RW_ATTR(name) \
+ NILFS_RW_ATTR(superblock, name)
+
+#define NILFS_SEGCTOR_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RO_ATTR(name) \
+ NILFS_RO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RW_ATTR(name) \
+ NILFS_RW_ATTR(segctor, name)
+
+#define NILFS_FEATURE_ATTR_LIST(name) \
+ (&nilfs_feature_attr_##name.attr)
+#define NILFS_DEV_ATTR_LIST(name) \
+ (&nilfs_dev_attr_##name.attr)
+#define NILFS_SEGMENTS_ATTR_LIST(name) \
+ (&nilfs_segments_attr_##name.attr)
+#define NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(name) \
+ (&nilfs_mounted_snapshots_attr_##name.attr)
+#define NILFS_CHECKPOINTS_ATTR_LIST(name) \
+ (&nilfs_checkpoints_attr_##name.attr)
+#define NILFS_SNAPSHOT_ATTR_LIST(name) \
+ (&nilfs_snapshot_attr_##name.attr)
+#define NILFS_SUPERBLOCK_ATTR_LIST(name) \
+ (&nilfs_superblock_attr_##name.attr)
+#define NILFS_SEGCTOR_ATTR_LIST(name) \
+ (&nilfs_segctor_attr_##name.attr)
+
+#endif /* _NILFS_SYSFS_H */
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8ba8229ba076..9da25fe9ea61 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -85,6 +85,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
nilfs->ns_cptree = RB_ROOT;
spin_lock_init(&nilfs->ns_cptree_lock);
init_rwsem(&nilfs->ns_segctor_sem);
+ nilfs->ns_sb_update_freq = NILFS_SB_FREQ;
return nilfs;
}
@@ -97,6 +98,7 @@ void destroy_nilfs(struct the_nilfs *nilfs)
{
might_sleep();
if (nilfs_init(nilfs)) {
+ nilfs_sysfs_delete_device_group(nilfs);
brelse(nilfs->ns_sbh[0]);
brelse(nilfs->ns_sbh[1]);
}
@@ -640,6 +642,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto failed_sbh;
+ err = nilfs_sysfs_create_device_group(sb);
+ if (err)
+ goto failed_sbh;
+
set_nilfs_init(nilfs);
err = 0;
out:
@@ -740,12 +746,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
{
struct rb_node **p, *parent;
struct nilfs_root *root, *new;
+ int err;
root = nilfs_lookup_root(nilfs, cno);
if (root)
return root;
- new = kmalloc(sizeof(*root), GFP_KERNEL);
+ new = kzalloc(sizeof(*root), GFP_KERNEL);
if (!new)
return NULL;
@@ -782,6 +789,12 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
spin_unlock(&nilfs->ns_cptree_lock);
+ err = nilfs_sysfs_create_snapshot_group(new);
+ if (err) {
+ kfree(new);
+ new = NULL;
+ }
+
return new;
}
@@ -790,6 +803,8 @@ void nilfs_put_root(struct nilfs_root *root)
if (atomic_dec_and_test(&root->count)) {
struct the_nilfs *nilfs = root->nilfs;
+ nilfs_sysfs_delete_snapshot_group(root);
+
spin_lock(&nilfs->ns_cptree_lock);
rb_erase(&root->rb_node, &nilfs->ns_cptree);
spin_unlock(&nilfs->ns_cptree_lock);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index de8cc53b4a5c..d01ead1bea9a 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -33,6 +33,7 @@
#include <linux/slab.h>
struct nilfs_sc_info;
+struct nilfs_sysfs_dev_subgroups;
/* the_nilfs struct */
enum {
@@ -54,6 +55,7 @@ enum {
* @ns_sbwcount: write count of super block
* @ns_sbsize: size of valid data in super block
* @ns_mount_state: file system state
+ * @ns_sb_update_freq: interval of periodical update of superblocks (in seconds)
* @ns_seg_seq: segment sequence counter
* @ns_segnum: index number of the latest full segment.
* @ns_nextnum: index number of the full segment index to be used next
@@ -95,6 +97,9 @@ enum {
* @ns_inode_size: size of on-disk inode
* @ns_first_ino: first not-special inode number
* @ns_crc_seed: seed value of CRC32 calculation
+ * @ns_dev_kobj: /sys/fs/<nilfs>/<device>
+ * @ns_dev_kobj_unregister: completion state
+ * @ns_dev_subgroups: <device> subgroups pointer
*/
struct the_nilfs {
unsigned long ns_flags;
@@ -114,6 +119,7 @@ struct the_nilfs {
unsigned ns_sbwcount;
unsigned ns_sbsize;
unsigned ns_mount_state;
+ unsigned ns_sb_update_freq;
/*
* Following fields are dedicated to a writable FS-instance.
@@ -188,6 +194,11 @@ struct the_nilfs {
int ns_inode_size;
int ns_first_ino;
u32 ns_crc_seed;
+
+ /* /sys/fs/<nilfs>/<device> */
+ struct kobject ns_dev_kobj;
+ struct completion ns_dev_kobj_unregister;
+ struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
};
#define THE_NILFS_FNS(bit, name) \
@@ -232,6 +243,8 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
* @ifile: inode file
* @inodes_count: number of inodes
* @blocks_count: number of blocks
+ * @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot>
+ * @snapshot_kobj_unregister: completion state for kernel object
*/
struct nilfs_root {
__u64 cno;
@@ -243,6 +256,10 @@ struct nilfs_root {
atomic64_t inodes_count;
atomic64_t blocks_count;
+
+ /* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */
+ struct kobject snapshot_kobj;
+ struct completion snapshot_kobj_unregister;
};
/* Special checkpoint number */
@@ -254,7 +271,8 @@ struct nilfs_root {
static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
{
u64 t = get_seconds();
- return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ;
+ return t < nilfs->ns_sbwtime ||
+ t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
}
static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 74825be65b7b..9ce062218de9 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
- hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
+ hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
out:
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 68ca5a8704b5..ac851e8376b1 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
- hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
+ hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
out:
fsnotify_recalc_vfsmount_mask_locked(mnt);
spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9d8fcf2f3b94..a93bf9892256 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4961,6 +4961,15 @@ leftright:
el = path_leaf_el(path);
split_index = ocfs2_search_extent_list(el, cpos);
+ if (split_index == -1) {
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu has an extent at cpos %u "
+ "which can no longer be found.\n",
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ cpos);
+ ret = -EROFS;
+ goto out;
+ }
goto leftright;
}
out:
@@ -5135,7 +5144,7 @@ int ocfs2_change_extent_flag(handle_t *handle,
el = path_leaf_el(left_path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(sb,
"Owner %llu has an extent at cpos %u which can no "
"longer be found.\n",
@@ -5491,7 +5500,7 @@ int ocfs2_remove_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
"Owner %llu has an extent at cpos %u which can no "
"longer be found.\n",
@@ -5557,7 +5566,7 @@ int ocfs2_remove_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
"Owner %llu: split at cpos %u lost record.",
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 4a231a166cf8..a06b23773384 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1817,16 +1817,6 @@ try_again:
if (ret)
goto out_commit;
}
- /*
- * We don't want this to fail in ocfs2_write_end(), so do it
- * here.
- */
- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out_quota;
- }
/*
* Fill our page array first. That way we've grabbed enough so
@@ -1977,7 +1967,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- int i;
+ int i, ret;
unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
struct inode *inode = mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -2027,6 +2017,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
}
+ ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ copied = ret;
+ mlog_errno(ret);
+ goto out;
+ }
+
out_write_size:
pos += copied;
if (pos > i_size_read(inode)) {
@@ -2041,6 +2039,7 @@ out_write_size:
ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, wc->w_di_bh);
+out:
ocfs2_commit_trans(osb, handle);
ocfs2_run_deallocs(osb, &wc->w_dealloc);
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 1ec141e758d7..62e8ec619b4c 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -160,9 +160,18 @@ static void o2quo_make_decision(struct work_struct *work)
}
out:
- spin_unlock(&qs->qs_lock);
- if (fence)
+ if (fence) {
+ spin_unlock(&qs->qs_lock);
o2quo_fence_self();
+ } else {
+ mlog(ML_NOTICE, "not fencing this node, heartbeating: %d, "
+ "connected: %d, lowest: %d (%sreachable)\n",
+ qs->qs_heartbeating, qs->qs_connected, lowest_hb,
+ lowest_reachable ? "" : "un");
+ spin_unlock(&qs->qs_lock);
+
+ }
+
}
static void o2quo_set_hold(struct o2quo_state *qs, u8 node)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 681691bc233a..ea34952f9496 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1480,6 +1480,14 @@ static int o2net_set_nodelay(struct socket *sock)
return ret;
}
+static int o2net_set_usertimeout(struct socket *sock)
+{
+ int user_timeout = O2NET_TCP_USER_TIMEOUT;
+
+ return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
+ (char *)&user_timeout, sizeof(user_timeout));
+}
+
static void o2net_initialize_handshake(void)
{
o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
@@ -1536,16 +1544,20 @@ static void o2net_idle_timer(unsigned long data)
#endif
printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been "
- "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc),
- msecs / 1000, msecs % 1000);
+ "idle for %lu.%lu secs.\n",
+ SC_NODEF_ARGS(sc), msecs / 1000, msecs % 1000);
- /*
- * Initialize the nn_timeout so that the next connection attempt
- * will continue in o2net_start_connect.
+ /* idle timerout happen, don't shutdown the connection, but
+ * make fence decision. Maybe the connection can recover before
+ * the decision is made.
*/
atomic_set(&nn->nn_timeout, 1);
+ o2quo_conn_err(o2net_num_from_nn(nn));
+ queue_delayed_work(o2net_wq, &nn->nn_still_up,
+ msecs_to_jiffies(O2NET_QUORUM_DELAY_MS));
+
+ o2net_sc_reset_idle_timer(sc);
- o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
}
static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
@@ -1560,6 +1572,15 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
{
+ struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
+
+ /* clear fence decision since the connection recover from timeout*/
+ if (atomic_read(&nn->nn_timeout)) {
+ o2quo_conn_up(o2net_num_from_nn(nn));
+ cancel_delayed_work(&nn->nn_still_up);
+ atomic_set(&nn->nn_timeout, 0);
+ }
+
/* Only push out an existing timer */
if (timer_pending(&sc->sc_idle_timeout))
o2net_sc_reset_idle_timer(sc);
@@ -1650,6 +1671,12 @@ static void o2net_start_connect(struct work_struct *work)
goto out;
}
+ ret = o2net_set_usertimeout(sock);
+ if (ret) {
+ mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
+ goto out;
+ }
+
o2net_register_callbacks(sc->sc_sock->sk, sc);
spin_lock(&nn->nn_lock);
@@ -1831,6 +1858,12 @@ static int o2net_accept_one(struct socket *sock, int *more)
goto out;
}
+ ret = o2net_set_usertimeout(new_sock);
+ if (ret) {
+ mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
+ goto out;
+ }
+
slen = sizeof(sin);
ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
&slen, 1);
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 5bada2a69b50..c571e849fda4 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -63,6 +63,7 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data,
#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000
#define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000
+#define O2NET_TCP_USER_TIMEOUT 0x7fffffff
/* TODO: figure this out.... */
static inline int o2net_link_down(int err, struct socket *sock)
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 39efc5057a36..3fcf205ee900 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1923,12 +1923,11 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
goto bail;
}
- if (total_backoff >
- msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
+ if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) {
status = -ERESTARTSYS;
mlog(ML_NOTICE, "Timed out joining dlm domain "
"%s after %u msecs\n", dlm->name,
- jiffies_to_msecs(total_backoff));
+ total_backoff);
goto bail;
}
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 437de7f768c6..21708cda4b8a 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1192,17 +1192,9 @@ void ocfs2_evict_inode(struct inode *inode)
int ocfs2_drop_inode(struct inode *inode)
{
struct ocfs2_inode_info *oi = OCFS2_I(inode);
- int res;
-
trace_ocfs2_drop_inode((unsigned long long)oi->ip_blkno,
inode->i_nlink, oi->ip_flags);
-
- if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
- res = 1;
- else
- res = generic_drop_inode(inode);
-
- return res;
+ return 1;
}
/*
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 6f66b3751ace..53e6c40ed4c6 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -35,9 +35,8 @@
copy_to_user((typeof(a) __user *)b, &(a), sizeof(a))
/*
- * This call is void because we are already reporting an error that may
- * be -EFAULT. The error will be returned from the ioctl(2) call. It's
- * just a best-effort to tell userspace that this request caused the error.
+ * This is just a best-effort to tell userspace that this request
+ * caused the error.
*/
static inline void o2info_set_request_error(struct ocfs2_info_request *kreq,
struct ocfs2_info_request __user *req)
@@ -146,136 +145,105 @@ bail:
static int ocfs2_info_handle_blocksize(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_blocksize oib;
if (o2info_from_user(oib, req))
- goto bail;
+ return -EFAULT;
oib.ib_blocksize = inode->i_sb->s_blocksize;
o2info_set_request_filled(&oib.ib_req);
if (o2info_to_user(oib, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oib.ib_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_clustersize(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_clustersize oic;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oic, req))
- goto bail;
+ return -EFAULT;
oic.ic_clustersize = osb->s_clustersize;
o2info_set_request_filled(&oic.ic_req);
if (o2info_to_user(oic, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oic.ic_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_maxslots(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_maxslots oim;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oim, req))
- goto bail;
+ return -EFAULT;
oim.im_max_slots = osb->max_slots;
o2info_set_request_filled(&oim.im_req);
if (o2info_to_user(oim, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oim.im_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_label(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_label oil;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oil, req))
- goto bail;
+ return -EFAULT;
memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
o2info_set_request_filled(&oil.il_req);
if (o2info_to_user(oil, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oil.il_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_uuid(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_uuid oiu;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oiu, req))
- goto bail;
+ return -EFAULT;
memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
o2info_set_request_filled(&oiu.iu_req);
if (o2info_to_user(oiu, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oiu.iu_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_fs_features(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_fs_features oif;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oif, req))
- goto bail;
+ return -EFAULT;
oif.if_compat_features = osb->s_feature_compat;
oif.if_incompat_features = osb->s_feature_incompat;
@@ -284,39 +252,28 @@ static int ocfs2_info_handle_fs_features(struct inode *inode,
o2info_set_request_filled(&oif.if_req);
if (o2info_to_user(oif, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oif.if_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_journal_size(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_journal_size oij;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oij, req))
- goto bail;
+ return -EFAULT;
oij.ij_journal_size = i_size_read(osb->journal->j_inode);
o2info_set_request_filled(&oij.ij_req);
if (o2info_to_user(oij, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oij.ij_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
@@ -373,7 +330,7 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
u32 i;
u64 blkno = -1;
char namebuf[40];
- int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE;
+ int status, type = INODE_ALLOC_SYSTEM_INODE;
struct ocfs2_info_freeinode *oifi = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct inode *inode_alloc = NULL;
@@ -385,8 +342,10 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
goto out_err;
}
- if (o2info_from_user(*oifi, req))
- goto bail;
+ if (o2info_from_user(*oifi, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
oifi->ifi_slotnum = osb->max_slots;
@@ -424,14 +383,16 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
o2info_set_request_filled(&oifi->ifi_req);
- if (o2info_to_user(*oifi, req))
- goto bail;
+ if (o2info_to_user(*oifi, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
status = 0;
bail:
if (status)
o2info_set_request_error(&oifi->ifi_req, req);
-
+out_free:
kfree(oifi);
out_err:
return status;
@@ -658,7 +619,7 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
{
u64 blkno = -1;
char namebuf[40];
- int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE;
+ int status, type = GLOBAL_BITMAP_SYSTEM_INODE;
struct ocfs2_info_freefrag *oiff;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -671,8 +632,10 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
goto out_err;
}
- if (o2info_from_user(*oiff, req))
- goto bail;
+ if (o2info_from_user(*oiff, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
/*
* chunksize from userspace should be power of 2.
*/
@@ -711,14 +674,14 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
if (o2info_to_user(*oiff, req)) {
status = -EFAULT;
- goto bail;
+ goto out_free;
}
status = 0;
bail:
if (status)
o2info_set_request_error(&oiff->iff_req, req);
-
+out_free:
kfree(oiff);
out_err:
return status;
@@ -727,23 +690,17 @@ out_err:
static int ocfs2_info_handle_unknown(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_request oir;
if (o2info_from_user(oir, req))
- goto bail;
+ return -EFAULT;
o2info_clear_request_filled(&oir);
if (o2info_to_user(oir, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oir, req);
-
- return status;
+ return 0;
}
/*
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 599eb4c4c8be..6219aaadeb08 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -98,7 +98,7 @@ static int __ocfs2_move_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(inode->i_sb,
"Inode %llu has an extent at cpos %u which can no "
"longer be found.\n",
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 636aab69ead5..d81f6e2a97f5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3109,7 +3109,7 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(sb,
"Inode %llu has an extent at cpos %u which can no "
"longer be found.\n",
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 1424c151cccc..a88b2a4fcc85 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -382,7 +382,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
- si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
+ si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
GFP_KERNEL);
if (!si->si_bh) {
status = -ENOMEM;
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index ec58c7659183..ba8819702c56 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -321,7 +321,7 @@ static int omfs_get_imap(struct super_block *sb)
goto out;
sbi->s_imap_size = array_size;
- sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL);
+ sbi->s_imap = kcalloc(array_size, sizeof(unsigned long *), GFP_KERNEL);
if (!sbi->s_imap)
goto nomem;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 39e6ef32f0bd..6df8d0722c97 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -172,7 +172,7 @@ get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
- end = ALIGN(end, PAGE_SIZE);
+ end = PAGE_ALIGN(end);
/* overlap check (because we have to align page */
list_for_each_entry(tmp, head, list) {
if (tmp->type != KCORE_VMEMMAP)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 71290463a1d3..f92d5dd578a4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -632,7 +632,7 @@ out:
return ret;
}
-static int scan(struct ctl_table_header *head, ctl_table *table,
+static int scan(struct ctl_table_header *head, struct ctl_table *table,
unsigned long *pos, struct file *file,
struct dir_context *ctx)
{
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 9d231e9e5f0e..bf2d03f8fd3e 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -184,29 +184,11 @@ static int show_stat(struct seq_file *p, void *v)
static int stat_open(struct inode *inode, struct file *file)
{
- size_t size = 1024 + 128 * num_possible_cpus();
- char *buf;
- struct seq_file *m;
- int res;
+ size_t size = 1024 + 128 * num_online_cpus();
/* minimum size to display an interrupt count : 2 bytes */
size += 2 * nr_irqs;
-
- /* don't ask for more than the kmalloc() max size */
- if (size > KMALLOC_MAX_SIZE)
- size = KMALLOC_MAX_SIZE;
- buf = kmalloc(size, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- res = single_open(file, show_stat, NULL);
- if (!res) {
- m = file->private_data;
- m->buf = buf;
- m->size = ksize(buf);
- } else
- kfree(buf);
- return res;
+ return single_open_size(file, show_stat, NULL, size);
}
static const struct file_operations proc_stat_operations = {
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 34a1e5aa848c..9d7b9a83699e 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -394,7 +394,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
prot = pgprot_noncached(PAGE_KERNEL);
- pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL);
+ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
if (!pages) {
pr_err("%s: Failed to allocate array for %u pages\n",
__func__, page_count);
diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile
index 9dd06199afc9..5e6bae6fae50 100644
--- a/fs/qnx6/Makefile
+++ b/fs/qnx6/Makefile
@@ -5,3 +5,4 @@
obj-$(CONFIG_QNX6FS_FS) += qnx6.o
qnx6-objs := inode.o dir.o namei.o super_mmi.o
+ccflags-$(CONFIG_QNX6FS_DEBUG) += -DDEBUG
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index 15b7d92ed60d..8d64bb5366bf 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -77,21 +77,20 @@ static int qnx6_dir_longfilename(struct inode *inode,
if (de->de_size != 0xff) {
/* error - long filename entries always have size 0xff
in direntry */
- printk(KERN_ERR "qnx6: invalid direntry size (%i).\n",
- de->de_size);
+ pr_err("invalid direntry size (%i).\n", de->de_size);
return 0;
}
lf = qnx6_longname(s, de, &page);
if (IS_ERR(lf)) {
- printk(KERN_ERR "qnx6:Error reading longname\n");
+ pr_err("Error reading longname\n");
return 0;
}
lf_size = fs16_to_cpu(sbi, lf->lf_size);
if (lf_size > QNX6_LONG_NAME_MAX) {
- QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname));
- printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size);
+ pr_debug("file %s\n", lf->lf_fname);
+ pr_err("Filename too long (%i)\n", lf_size);
qnx6_put_page(page);
return 0;
}
@@ -100,10 +99,10 @@ static int qnx6_dir_longfilename(struct inode *inode,
mmi 3g filesystem does not have that checksum */
if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) !=
qnx6_lfile_checksum(lf->lf_fname, lf_size))
- printk(KERN_INFO "qnx6: long filename checksum error.\n");
+ pr_info("long filename checksum error.\n");
- QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n",
- lf_size, lf->lf_fname, de_inode));
+ pr_debug("qnx6_readdir:%.*s inode:%u\n",
+ lf_size, lf->lf_fname, de_inode);
if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) {
qnx6_put_page(page);
return 0;
@@ -136,7 +135,7 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
int i = start;
if (IS_ERR(page)) {
- printk(KERN_ERR "qnx6_readdir: read failed\n");
+ pr_err("%s(): read failed\n", __func__);
ctx->pos = (n + 1) << PAGE_CACHE_SHIFT;
return PTR_ERR(page);
}
@@ -159,9 +158,9 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
break;
}
} else {
- QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s"
- " inode:%u\n", size, de->de_fname,
- no_inode));
+ pr_debug("%s():%.*s inode:%u\n",
+ __func__, size, de->de_fname,
+ no_inode);
if (!dir_emit(ctx, de->de_fname, size,
no_inode, DT_UNKNOWN)) {
done = true;
@@ -259,8 +258,7 @@ unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
if (ino)
goto found;
} else
- printk(KERN_ERR "qnx6: undefined "
- "filename size in inode.\n");
+ pr_err("undefined filename size in inode.\n");
}
qnx6_put_page(page);
}
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 65cdaab3ed49..44e73923670d 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -73,8 +73,8 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
{
unsigned phys;
- QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n",
- inode->i_ino, (unsigned long)iblock));
+ pr_debug("qnx6_get_block inode=[%ld] iblock=[%ld]\n",
+ inode->i_ino, (unsigned long)iblock);
phys = qnx6_block_map(inode, iblock);
if (phys) {
@@ -87,7 +87,7 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
static int qnx6_check_blockptr(__fs32 ptr)
{
if (ptr == ~(__fs32)0) {
- printk(KERN_ERR "qnx6: hit unused blockpointer.\n");
+ pr_err("hit unused blockpointer.\n");
return 0;
}
return 1;
@@ -127,8 +127,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
levelptr = no >> bitdelta;
if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) {
- printk(KERN_ERR "qnx6:Requested file block number (%u) too big.",
- no);
+ pr_err("Requested file block number (%u) too big.", no);
return 0;
}
@@ -137,8 +136,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
for (i = 0; i < depth; i++) {
bh = sb_bread(s, block);
if (!bh) {
- printk(KERN_ERR "qnx6:Error reading block (%u)\n",
- block);
+ pr_err("Error reading block (%u)\n", block);
return 0;
}
bitdelta -= ptrbits;
@@ -207,26 +205,16 @@ void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s)
{
struct qnx6_sb_info *sbi = QNX6_SB(s);
- QNX6DEBUG((KERN_INFO "magic: %08x\n",
- fs32_to_cpu(sbi, sb->sb_magic)));
- QNX6DEBUG((KERN_INFO "checksum: %08x\n",
- fs32_to_cpu(sbi, sb->sb_checksum)));
- QNX6DEBUG((KERN_INFO "serial: %llx\n",
- fs64_to_cpu(sbi, sb->sb_serial)));
- QNX6DEBUG((KERN_INFO "flags: %08x\n",
- fs32_to_cpu(sbi, sb->sb_flags)));
- QNX6DEBUG((KERN_INFO "blocksize: %08x\n",
- fs32_to_cpu(sbi, sb->sb_blocksize)));
- QNX6DEBUG((KERN_INFO "num_inodes: %08x\n",
- fs32_to_cpu(sbi, sb->sb_num_inodes)));
- QNX6DEBUG((KERN_INFO "free_inodes: %08x\n",
- fs32_to_cpu(sbi, sb->sb_free_inodes)));
- QNX6DEBUG((KERN_INFO "num_blocks: %08x\n",
- fs32_to_cpu(sbi, sb->sb_num_blocks)));
- QNX6DEBUG((KERN_INFO "free_blocks: %08x\n",
- fs32_to_cpu(sbi, sb->sb_free_blocks)));
- QNX6DEBUG((KERN_INFO "inode_levels: %02x\n",
- sb->Inode.levels));
+ pr_debug("magic: %08x\n", fs32_to_cpu(sbi, sb->sb_magic));
+ pr_debug("checksum: %08x\n", fs32_to_cpu(sbi, sb->sb_checksum));
+ pr_debug("serial: %llx\n", fs64_to_cpu(sbi, sb->sb_serial));
+ pr_debug("flags: %08x\n", fs32_to_cpu(sbi, sb->sb_flags));
+ pr_debug("blocksize: %08x\n", fs32_to_cpu(sbi, sb->sb_blocksize));
+ pr_debug("num_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_num_inodes));
+ pr_debug("free_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_free_inodes));
+ pr_debug("num_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_num_blocks));
+ pr_debug("free_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_free_blocks));
+ pr_debug("inode_levels: %02x\n", sb->Inode.levels);
}
#endif
@@ -277,7 +265,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
start with the first superblock */
bh = sb_bread(s, offset);
if (!bh) {
- printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+ pr_err("unable to read the first superblock\n");
return NULL;
}
sb = (struct qnx6_super_block *)bh->b_data;
@@ -285,20 +273,16 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
sbi->s_bytesex = BYTESEX_BE;
if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
/* we got a big endian fs */
- QNX6DEBUG((KERN_INFO "qnx6: fs got different"
- " endianness.\n"));
+ pr_debug("fs got different endianness.\n");
return bh;
} else
sbi->s_bytesex = BYTESEX_LE;
if (!silent) {
if (offset == 0) {
- printk(KERN_ERR "qnx6: wrong signature (magic)"
- " in superblock #1.\n");
+ pr_err("wrong signature (magic) in superblock #1.\n");
} else {
- printk(KERN_INFO "qnx6: wrong signature (magic)"
- " at position (0x%lx) - will try"
- " alternative position (0x0000).\n",
- offset * s->s_blocksize);
+ pr_info("wrong signature (magic) at position (0x%lx) - will try alternative position (0x0000).\n",
+ offset * s->s_blocksize);
}
}
brelse(bh);
@@ -329,13 +313,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* Superblock always is 512 Byte long */
if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto outnobh;
}
/* parse the mount-options */
if (!qnx6_parse_options((char *) data, s)) {
- printk(KERN_ERR "qnx6: invalid mount options.\n");
+ pr_err("invalid mount options.\n");
goto outnobh;
}
if (test_opt(s, MMI_FS)) {
@@ -355,7 +339,7 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* try again without bootblock offset */
bh1 = qnx6_check_first_superblock(s, 0, silent);
if (!bh1) {
- printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+ pr_err("unable to read the first superblock\n");
goto outnobh;
}
/* seems that no bootblock at partition start */
@@ -370,13 +354,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
/* set new blocksize */
if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto out;
}
/* blocksize invalidates bh - pull it back in */
@@ -398,21 +382,20 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* next the second superblock */
bh2 = sb_bread(s, offset);
if (!bh2) {
- printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+ pr_err("unable to read the second superblock\n");
goto out;
}
sb2 = (struct qnx6_super_block *)bh2->b_data;
if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent)
- printk(KERN_ERR "qnx6: wrong signature (magic)"
- " in superblock #2.\n");
+ pr_err("wrong signature (magic) in superblock #2.\n");
goto out;
}
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb2->sb_checksum) !=
crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #2 checksum error\n");
+ pr_err("superblock #2 checksum error\n");
goto out;
}
@@ -422,25 +405,24 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
sbi->sb_buf = bh1;
sbi->sb = (struct qnx6_super_block *)bh1->b_data;
brelse(bh2);
- printk(KERN_INFO "qnx6: superblock #1 active\n");
+ pr_info("superblock #1 active\n");
} else {
/* superblock #2 active */
sbi->sb_buf = bh2;
sbi->sb = (struct qnx6_super_block *)bh2->b_data;
brelse(bh1);
- printk(KERN_INFO "qnx6: superblock #2 active\n");
+ pr_info("superblock #2 active\n");
}
mmi_success:
/* sanity check - limit maximum indirect pointer levels */
if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) {
- printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n",
- QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
+ pr_err("too many inode levels (max %i, sb %i)\n",
+ QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
goto out;
}
if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) {
- printk(KERN_ERR "qnx6: too many longfilename levels"
- " (max %i, sb %i)\n",
- QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
+ pr_err("too many longfilename levels (max %i, sb %i)\n",
+ QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
goto out;
}
s->s_op = &qnx6_sops;
@@ -460,7 +442,7 @@ mmi_success:
/* prefetch root inode */
root = qnx6_iget(s, QNX6_ROOT_INO);
if (IS_ERR(root)) {
- printk(KERN_ERR "qnx6: get inode failed\n");
+ pr_err("get inode failed\n");
ret = PTR_ERR(root);
goto out2;
}
@@ -474,7 +456,7 @@ mmi_success:
errmsg = qnx6_checkroot(s);
if (errmsg != NULL) {
if (!silent)
- printk(KERN_ERR "qnx6: %s\n", errmsg);
+ pr_err("%s\n", errmsg);
goto out3;
}
return 0;
@@ -555,8 +537,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
inode->i_mode = 0;
if (ino == 0) {
- printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is "
- "out of range\n",
+ pr_err("bad inode number on dev %s: %u is out of range\n",
sb->s_id, ino);
iget_failed(inode);
return ERR_PTR(-EIO);
@@ -566,8 +547,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
mapping = sbi->inodes->i_mapping;
page = read_mapping_page(mapping, n, NULL);
if (IS_ERR(page)) {
- printk(KERN_ERR "qnx6: major problem: unable to read inode from "
- "dev %s\n", sb->s_id);
+ pr_err("major problem: unable to read inode from dev %s\n",
+ sb->s_id);
iget_failed(inode);
return ERR_CAST(page);
}
@@ -689,7 +670,7 @@ static int __init init_qnx6_fs(void)
return err;
}
- printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n");
+ pr_info("QNX6 filesystem 1.0.0 registered.\n");
return 0;
}
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 0561326a94f5..6c1a323137dd 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -29,12 +29,12 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
foundinode = qnx6_iget(dir->i_sb, ino);
qnx6_put_page(page);
if (IS_ERR(foundinode)) {
- QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> "
- " error %ld\n", PTR_ERR(foundinode)));
+ pr_debug("lookup->iget -> error %ld\n",
+ PTR_ERR(foundinode));
return ERR_CAST(foundinode);
}
} else {
- QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name));
+ pr_debug("%s(): not found %s\n", __func__, name);
return NULL;
}
d_add(dentry, foundinode);
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index b00fcc960d37..d3fb2b698800 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -10,6 +10,12 @@
*
*/
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -19,12 +25,6 @@ typedef __u64 __bitwise __fs64;
#include <linux/qnx6_fs.h>
-#ifdef CONFIG_QNX6FS_DEBUG
-#define QNX6DEBUG(X) printk X
-#else
-#define QNX6DEBUG(X) (void) 0
-#endif
-
struct qnx6_sb_info {
struct buffer_head *sb_buf; /* superblock buffer */
struct qnx6_super_block *sb; /* our superblock */
diff --git a/fs/qnx6/super_mmi.c b/fs/qnx6/super_mmi.c
index 29c32cba62d6..62aaf3e3126a 100644
--- a/fs/qnx6/super_mmi.c
+++ b/fs/qnx6/super_mmi.c
@@ -44,15 +44,14 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
start with the first superblock */
bh1 = sb_bread(s, 0);
if (!bh1) {
- printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n");
+ pr_err("Unable to read first mmi superblock\n");
return NULL;
}
sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
sbi = QNX6_SB(s);
if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent) {
- printk(KERN_ERR "qnx6: wrong signature (magic) in"
- " superblock #1.\n");
+ pr_err("wrong signature (magic) in superblock #1.\n");
goto out;
}
}
@@ -60,7 +59,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
@@ -70,7 +69,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* set new blocksize */
if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto out;
}
/* blocksize invalidates bh - pull it back in */
@@ -83,27 +82,26 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* read second superblock */
bh2 = sb_bread(s, offset);
if (!bh2) {
- printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+ pr_err("unable to read the second superblock\n");
goto out;
}
sb2 = (struct qnx6_mmi_super_block *)bh2->b_data;
if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent)
- printk(KERN_ERR "qnx6: wrong signature (magic) in"
- " superblock #2.\n");
+ pr_err("wrong signature (magic) in superblock #2.\n");
goto out;
}
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb2->sb_checksum)
!= crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
qsb = kmalloc(sizeof(*qsb), GFP_KERNEL);
if (!qsb) {
- printk(KERN_ERR "qnx6: unable to allocate memory.\n");
+ pr_err("unable to allocate memory.\n");
goto out;
}
@@ -119,7 +117,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
sbi->sb_buf = bh1;
sbi->sb = (struct qnx6_super_block *)bh1->b_data;
brelse(bh2);
- printk(KERN_INFO "qnx6: superblock #1 active\n");
+ pr_info("superblock #1 active\n");
} else {
/* superblock #2 active */
qnx6_mmi_copy_sb(qsb, sb2);
@@ -131,7 +129,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
sbi->sb_buf = bh2;
sbi->sb = (struct qnx6_super_block *)bh2->b_data;
brelse(bh1);
- printk(KERN_INFO "qnx6: superblock #2 active\n");
+ pr_info("superblock #2 active\n");
}
kfree(qsb);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index dda012ad4208..bbafbde3471a 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -222,7 +222,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
/* gang-find the pages */
ret = -ENOMEM;
- pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL);
+ pages = kcalloc(lpages, sizeof(struct page *), GFP_KERNEL);
if (!pages)
goto out_free;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index d9f5a60dd59b..0a7dc941aaf4 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -9,7 +9,7 @@
#include <linux/stat.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
extern const struct reiserfs_key MIN_KEY;
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 54fdf196bfb2..5739cb99de7b 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -10,7 +10,7 @@
* and using buffers obtained after all above.
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/time.h>
#include "reiserfs.h"
#include <linux/buffer_head.h>
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index db9e80ba53a0..751dd3f4346b 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -6,7 +6,7 @@
#include "reiserfs.h"
#include "acl.h"
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/writeback.h>
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index 73231b1ebdbe..b751eea32e20 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -2,7 +2,7 @@
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/time.h>
#include "reiserfs.h"
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 63b2b0ec49e6..a7eec9888f10 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -11,7 +11,7 @@
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/unaligned.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 501ed6811a2b..6ec8a30a0911 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -7,7 +7,7 @@
#include <linux/mount.h>
#include "reiserfs.h"
#include <linux/time.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/pagemap.h>
#include <linux/compat.h>
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index cfaee912ee09..aca73dd73906 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -54,7 +54,7 @@ static void sd_print_item(struct item_head *ih, char *item)
} else {
struct stat_data *sd = (struct stat_data *)item;
- printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd),
+ printk("\t0%-6o | %6llu | %2u | %d | %s\n", sd_v2_mode(sd),
(unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
}
@@ -408,7 +408,7 @@ static void direntry_print_item(struct item_head *ih, char *item)
namebuf[namelen + 2] = 0;
}
- printk("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n",
+ printk("%d: %-15s%-15d%-15d%-15lld%-15lld(%s)\n",
i, namebuf,
deh_dir_id(deh), deh_objectid(deh),
GET_HASH_VALUE(deh_offset(deh)),
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index d6744c8b24e1..814dda3ec998 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -2,7 +2,7 @@
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/time.h>
#include "reiserfs.h"
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index c9b47e91baf8..ae1dc841db3a 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -17,7 +17,7 @@ static char off_buf[80];
static char *reiserfs_cpu_offset(struct cpu_key *key)
{
if (cpu_key_k_type(key) == TYPE_DIRENTRY)
- sprintf(off_buf, "%Lu(%Lu)",
+ sprintf(off_buf, "%llu(%llu)",
(unsigned long long)
GET_HASH_VALUE(cpu_key_k_offset(key)),
(unsigned long long)
@@ -34,7 +34,7 @@ static char *le_offset(struct reiserfs_key *key)
version = le_key_version(key);
if (le_key_k_type(version, key) == TYPE_DIRENTRY)
- sprintf(off_buf, "%Lu(%Lu)",
+ sprintf(off_buf, "%llu(%llu)",
(unsigned long long)
GET_HASH_VALUE(le_key_k_offset(version, key)),
(unsigned long long)
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 02b0b7d0f7d5..621b9f381fe1 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -11,7 +11,7 @@
#include <linux/module.h>
#include <linux/time.h>
#include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "reiserfs.h"
#include <linux/init.h>
#include <linux/proc_fs.h>
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index dd44468edc2b..24cbe013240f 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -2006,7 +2006,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
&s_search_path) == POSITION_FOUND);
RFALSE(file_size > ROUND_UP(new_file_size),
- "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
+ "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
update_and_out:
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index a392cef6acc6..709ea92d716f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -15,7 +15,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/time.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "reiserfs.h"
#include "acl.h"
#include "xattr.h"
@@ -331,7 +331,7 @@ static int finish_unfinished(struct super_block *s)
* not completed truncate found. New size was
* committed together with "save" link
*/
- reiserfs_info(s, "Truncating %k to %Ld ..",
+ reiserfs_info(s, "Truncating %k to %lld ..",
INODE_PKEY(inode), inode->i_size);
/* don't update modification time */
@@ -1577,7 +1577,7 @@ static int read_super_block(struct super_block *s, int offset)
rs = (struct reiserfs_super_block *)bh->b_data;
if (sb_blocksize(rs) != s->s_blocksize) {
reiserfs_warning(s, "sh-2011", "can't find a reiserfs "
- "filesystem on (dev %s, block %Lu, size %lu)",
+ "filesystem on (dev %s, block %llu, size %lu)",
s->s_id,
(unsigned long long)bh->b_blocknr,
s->s_blocksize);
@@ -2441,8 +2441,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
struct buffer_head tmp_bh, *bh;
if (!current->journal_info) {
- printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)"
- " cancelled because transaction is not started.\n",
+ printk(KERN_WARNING "reiserfs: Quota write (off=%llu, len=%llu) cancelled because transaction is not started.\n",
(unsigned long long)off, (unsigned long long)len);
return -EIO;
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ca416d099e7d..7c36898af402 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -45,7 +45,7 @@
#include <linux/xattr.h>
#include "xattr.h"
#include "acl.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/checksum.h>
#include <linux/stat.h>
#include <linux/quotaops.h>
@@ -84,6 +84,7 @@ static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
static int xattr_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
+
BUG_ON(!mutex_is_locked(&dir->i_mutex));
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -98,6 +99,7 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
+
BUG_ON(!mutex_is_locked(&dir->i_mutex));
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -117,6 +119,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
{
struct dentry *privroot = REISERFS_SB(sb)->priv_root;
struct dentry *xaroot;
+
if (!privroot->d_inode)
return ERR_PTR(-ENODATA);
@@ -127,6 +130,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
xaroot = ERR_PTR(-ENODATA);
else if (!xaroot->d_inode) {
int err = -ENODATA;
+
if (xattr_may_create(flags))
err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
if (err) {
@@ -157,6 +161,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
if (!IS_ERR(xadir) && !xadir->d_inode) {
int err = -ENODATA;
+
if (xattr_may_create(flags))
err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
if (err) {
@@ -188,6 +193,7 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
{
struct reiserfs_dentry_buf *dbuf = buf;
struct dentry *dentry;
+
WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
@@ -218,6 +224,7 @@ static void
cleanup_dentry_buf(struct reiserfs_dentry_buf *buf)
{
int i;
+
for (i = 0; i < buf->count; i++)
if (buf->dentries[i])
dput(buf->dentries[i]);
@@ -283,11 +290,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
struct reiserfs_transaction_handle th;
+
reiserfs_write_lock(inode->i_sb);
err = journal_begin(&th, inode->i_sb, blocks);
reiserfs_write_unlock(inode->i_sb);
if (!err) {
int jerror;
+
mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
I_MUTEX_XATTR);
err = action(dir, data);
@@ -340,6 +349,7 @@ static int chown_one_xattr(struct dentry *dentry, void *data)
int reiserfs_delete_xattrs(struct inode *inode)
{
int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL);
+
if (err)
reiserfs_warning(inode->i_sb, "jdm-20004",
"Couldn't delete all xattrs (%d)\n", err);
@@ -350,6 +360,7 @@ int reiserfs_delete_xattrs(struct inode *inode)
int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
{
int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs);
+
if (err)
reiserfs_warning(inode->i_sb, "jdm-20007",
"Couldn't chown all xattrs (%d)\n", err);
@@ -439,6 +450,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
static void update_ctime(struct inode *inode)
{
struct timespec now = current_fs_time(inode->i_sb);
+
if (inode_unhashed(inode) || !inode->i_nlink ||
timespec_equal(&inode->i_ctime, &now))
return;
@@ -514,6 +526,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
size_t chunk;
size_t skip = 0;
size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
+
if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE;
else
@@ -530,6 +543,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
if (file_pos == 0) {
struct reiserfs_xattr_header *rxh;
+
skip = file_pos = sizeof(struct reiserfs_xattr_header);
if (chunk + skip > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE - skip;
@@ -659,6 +673,7 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
size_t chunk;
char *data;
size_t skip = 0;
+
if (isize - file_pos > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE;
else
@@ -792,6 +807,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
int reiserfs_removexattr(struct dentry *dentry, const char *name)
{
const struct xattr_handler *handler;
+
handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
@@ -813,9 +829,11 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
{
struct listxattr_buf *b = (struct listxattr_buf *)buf;
size_t size;
+
if (name[0] != '.' ||
(namelen != 1 && (name[1] != '.' || namelen != 2))) {
const struct xattr_handler *handler;
+
handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
name);
if (!handler) /* Unsupported xattr name */
@@ -885,6 +903,7 @@ static int create_privroot(struct dentry *dentry)
{
int err;
struct inode *inode = dentry->d_parent->d_inode;
+
WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
err = xattr_mkdir(inode, dentry, 0700);
@@ -1015,6 +1034,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
mutex_lock(&privroot->d_inode->i_mutex);
if (!REISERFS_SB(s)->xattr_root) {
struct dentry *dentry;
+
dentry = lookup_one_len(XAROOT_NAME, privroot,
strlen(XAROOT_NAME));
if (!IS_ERR(dentry))
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 44503e293790..4b34b9dc03dd 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -9,7 +9,7 @@
#include <linux/posix_acl_xattr.h>
#include "xattr.h"
#include "acl.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th,
struct inode *inode, int type,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 800a3cef6f62..e7f8939a4cb5 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,7 +6,7 @@
#include <linux/slab.h>
#include "xattr.h"
#include <linux/security.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a0035719f66b..5eeb0c48ba46 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -5,7 +5,7 @@
#include <linux/pagemap.h>
#include <linux/xattr.h>
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 8667491ae7c3..e50eab046471 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -4,7 +4,7 @@
#include <linux/pagemap.h>
#include <linux/xattr.h>
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index ef90e8bca95a..e98dd88197d5 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -56,6 +56,8 @@
* 2 of the Licence, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/string.h>
#include <linux/fs.h>
@@ -380,7 +382,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
eio:
ret = -EIO;
error:
- printk(KERN_ERR "ROMFS: read error for inode 0x%lx\n", pos);
+ pr_err("read error for inode 0x%lx\n", pos);
return ERR_PTR(ret);
}
@@ -390,6 +392,7 @@ error:
static struct inode *romfs_alloc_inode(struct super_block *sb)
{
struct romfs_inode_info *inode;
+
inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL);
return inode ? &inode->vfs_inode : NULL;
}
@@ -400,6 +403,7 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
static void romfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
+
kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
}
@@ -507,15 +511,13 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 ||
img_size < ROMFH_SIZE) {
if (!silent)
- printk(KERN_WARNING "VFS:"
- " Can't find a romfs filesystem on dev %s.\n",
+ pr_warn("VFS: Can't find a romfs filesystem on dev %s.\n",
sb->s_id);
goto error_rsb_inval;
}
if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) {
- printk(KERN_ERR "ROMFS: bad initial checksum on dev %s.\n",
- sb->s_id);
+ pr_err("bad initial checksum on dev %s.\n", sb->s_id);
goto error_rsb_inval;
}
@@ -523,8 +525,8 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
len = strnlen(rsb->name, ROMFS_MAXFN);
if (!silent)
- printk(KERN_NOTICE "ROMFS: Mounting image '%*.*s' through %s\n",
- (unsigned) len, (unsigned) len, rsb->name, storage);
+ pr_notice("Mounting image '%*.*s' through %s\n",
+ (unsigned) len, (unsigned) len, rsb->name, storage);
kfree(rsb);
rsb = NULL;
@@ -614,7 +616,7 @@ static int __init init_romfs_fs(void)
{
int ret;
- printk(KERN_INFO "ROMFS MTD (C) 2007 Red Hat, Inc.\n");
+ pr_info("ROMFS MTD (C) 2007 Red Hat, Inc.\n");
romfs_inode_cachep =
kmem_cache_create("romfs_i",
@@ -623,13 +625,12 @@ static int __init init_romfs_fs(void)
romfs_i_init_once);
if (!romfs_inode_cachep) {
- printk(KERN_ERR
- "ROMFS error: Failed to initialise inode cache\n");
+ pr_err("Failed to initialise inode cache\n");
return -ENOMEM;
}
ret = register_filesystem(&romfs_fs_type);
if (ret) {
- printk(KERN_ERR "ROMFS error: Failed to register filesystem\n");
+ pr_err("Failed to register filesystem\n");
goto error_register;
}
return 0;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 1d641bb108d2..3857b720cb1b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -8,8 +8,10 @@
#include <linux/fs.h>
#include <linux/export.h>
#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/cred.h>
+#include <linux/mm.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -30,6 +32,16 @@ static void seq_set_overflow(struct seq_file *m)
m->count = m->size;
}
+static void *seq_buf_alloc(unsigned long size)
+{
+ void *buf;
+
+ buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
+ if (!buf && size > PAGE_SIZE)
+ buf = vmalloc(size);
+ return buf;
+}
+
/**
* seq_open - initialize sequential file
* @file: file we initialize
@@ -96,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset)
return 0;
}
if (!m->buf) {
- m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
if (!m->buf)
return -ENOMEM;
}
@@ -135,9 +147,9 @@ static int traverse(struct seq_file *m, loff_t offset)
Eoverflow:
m->op->stop(m, p);
- kfree(m->buf);
+ kvfree(m->buf);
m->count = 0;
- m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size <<= 1);
return !m->buf ? -ENOMEM : -EAGAIN;
}
@@ -192,7 +204,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
/* grab buffer if we didn't have one */
if (!m->buf) {
- m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
if (!m->buf)
goto Enomem;
}
@@ -232,9 +244,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
if (m->count < m->size)
goto Fill;
m->op->stop(m, p);
- kfree(m->buf);
+ kvfree(m->buf);
m->count = 0;
- m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size <<= 1);
if (!m->buf)
goto Enomem;
m->version = 0;
@@ -350,7 +362,7 @@ EXPORT_SYMBOL(seq_lseek);
int seq_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
- kfree(m->buf);
+ kvfree(m->buf);
kfree(m);
return 0;
}
@@ -605,13 +617,13 @@ EXPORT_SYMBOL(single_open);
int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
void *data, size_t size)
{
- char *buf = kmalloc(size, GFP_KERNEL);
+ char *buf = seq_buf_alloc(size);
int ret;
if (!buf)
return -ENOMEM;
ret = single_open(file, show, data);
if (ret) {
- kfree(buf);
+ kvfree(buf);
return ret;
}
((struct seq_file *)file->private_data)->buf = buf;
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 62a0de6632e1..43e7a7eddac0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -44,7 +44,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
pages = end_index - start_index + 1;
- page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+ page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
if (page == NULL)
return res;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 031c8d67fd51..5056babe00df 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -27,6 +27,8 @@
* the filesystem.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/slab.h>
@@ -448,8 +450,7 @@ static int __init init_squashfs_fs(void)
return err;
}
- printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
- "Phillip Lougher\n");
+ pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
return 0;
}
diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile
index dd39980437fc..4d0e02b022b3 100644
--- a/fs/ufs/Makefile
+++ b/fs/ufs/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_UFS_FS) += ufs.o
ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
namei.o super.o symlink.o truncate.o util.o
+ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b879f1ba3439..da73801301d5 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -65,7 +65,6 @@
* Evgeniy Dushistov <dushistov@mail.ru>, 2007
*/
-
#include <linux/exportfs.h>
#include <linux/module.h>
#include <linux/bitops.h>
@@ -172,73 +171,73 @@ static void ufs_print_super_stuff(struct super_block *sb,
{
u32 magic = fs32_to_cpu(sb, usb3->fs_magic);
- printk("ufs_print_super_stuff\n");
- printk(" magic: 0x%x\n", magic);
+ pr_debug("ufs_print_super_stuff\n");
+ pr_debug(" magic: 0x%x\n", magic);
if (fs32_to_cpu(sb, usb3->fs_magic) == UFS2_MAGIC) {
- printk(" fs_size: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
- printk(" fs_dsize: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
- printk(" bsize: %u\n",
- fs32_to_cpu(sb, usb1->fs_bsize));
- printk(" fsize: %u\n",
- fs32_to_cpu(sb, usb1->fs_fsize));
- printk(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname);
- printk(" fs_sblockloc: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
- printk(" cs_ndir(No of dirs): %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
- printk(" cs_nbfree(No of free blocks): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
- printk(KERN_INFO" cs_nifree(Num of free inodes): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
- printk(KERN_INFO" cs_nffree(Num of free frags): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
- printk(KERN_INFO" fs_maxsymlinklen: %u\n",
- fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
+ pr_debug(" fs_size: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
+ pr_debug(" fs_dsize: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
+ pr_debug(" bsize: %u\n",
+ fs32_to_cpu(sb, usb1->fs_bsize));
+ pr_debug(" fsize: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fsize));
+ pr_debug(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname);
+ pr_debug(" fs_sblockloc: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
+ pr_debug(" cs_ndir(No of dirs): %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
+ pr_debug(" cs_nbfree(No of free blocks): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
+ pr_info(" cs_nifree(Num of free inodes): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
+ pr_info(" cs_nffree(Num of free frags): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
+ pr_info(" fs_maxsymlinklen: %u\n",
+ fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
} else {
- printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
- printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
- printk(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
- printk(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
- printk(" cgoffset: %u\n",
- fs32_to_cpu(sb, usb1->fs_cgoffset));
- printk(" ~cgmask: 0x%x\n",
- ~fs32_to_cpu(sb, usb1->fs_cgmask));
- printk(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size));
- printk(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
- printk(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
- printk(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
- printk(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
- printk(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag));
- printk(" fragshift: %u\n",
- fs32_to_cpu(sb, usb1->fs_fragshift));
- printk(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
- printk(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
- printk(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
- printk(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc));
- printk(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
- printk(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
- printk(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
- printk(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
- printk(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
- printk(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
- printk(" fstodb: %u\n",
- fs32_to_cpu(sb, usb1->fs_fsbtodb));
- printk(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
- printk(" ndir %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
- printk(" nifree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
- printk(" nbfree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
- printk(" nffree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
+ pr_debug(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
+ pr_debug(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
+ pr_debug(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
+ pr_debug(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
+ pr_debug(" cgoffset: %u\n",
+ fs32_to_cpu(sb, usb1->fs_cgoffset));
+ pr_debug(" ~cgmask: 0x%x\n",
+ ~fs32_to_cpu(sb, usb1->fs_cgmask));
+ pr_debug(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size));
+ pr_debug(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
+ pr_debug(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
+ pr_debug(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
+ pr_debug(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
+ pr_debug(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag));
+ pr_debug(" fragshift: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fragshift));
+ pr_debug(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
+ pr_debug(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
+ pr_debug(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
+ pr_debug(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc));
+ pr_debug(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
+ pr_debug(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
+ pr_debug(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
+ pr_debug(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
+ pr_debug(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
+ pr_debug(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
+ pr_debug(" fstodb: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fsbtodb));
+ pr_debug(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
+ pr_debug(" ndir %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
+ pr_debug(" nifree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
+ pr_debug(" nbfree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
+ pr_debug(" nffree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
}
- printk("\n");
+ pr_debug("\n");
}
/*
@@ -247,38 +246,38 @@ static void ufs_print_super_stuff(struct super_block *sb,
static void ufs_print_cylinder_stuff(struct super_block *sb,
struct ufs_cylinder_group *cg)
{
- printk("\nufs_print_cylinder_stuff\n");
- printk("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
- printk(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic));
- printk(" time: %u\n", fs32_to_cpu(sb, cg->cg_time));
- printk(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx));
- printk(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
- printk(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk));
- printk(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
- printk(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
- printk(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
- printk(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
- printk(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
- printk(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor));
- printk(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor));
- printk(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor));
- printk(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n",
+ pr_debug("\nufs_print_cylinder_stuff\n");
+ pr_debug("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
+ pr_debug(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic));
+ pr_debug(" time: %u\n", fs32_to_cpu(sb, cg->cg_time));
+ pr_debug(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx));
+ pr_debug(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
+ pr_debug(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk));
+ pr_debug(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
+ pr_debug(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
+ pr_debug(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
+ pr_debug(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
+ pr_debug(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
+ pr_debug(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor));
+ pr_debug(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor));
+ pr_debug(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor));
+ pr_debug(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n",
fs32_to_cpu(sb, cg->cg_frsum[0]), fs32_to_cpu(sb, cg->cg_frsum[1]),
fs32_to_cpu(sb, cg->cg_frsum[2]), fs32_to_cpu(sb, cg->cg_frsum[3]),
fs32_to_cpu(sb, cg->cg_frsum[4]), fs32_to_cpu(sb, cg->cg_frsum[5]),
fs32_to_cpu(sb, cg->cg_frsum[6]), fs32_to_cpu(sb, cg->cg_frsum[7]));
- printk(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
- printk(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff));
- printk(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
- printk(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
- printk(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
- printk(" clustersumoff %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
- printk(" clusteroff %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
- printk(" nclusterblks %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
- printk("\n");
+ pr_debug(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
+ pr_debug(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff));
+ pr_debug(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
+ pr_debug(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
+ pr_debug(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
+ pr_debug(" clustersumoff %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
+ pr_debug(" clusteroff %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
+ pr_debug(" nclusterblks %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
+ pr_debug("\n");
}
#else
# define ufs_print_super_stuff(sb, usb1, usb2, usb3) /**/
@@ -287,13 +286,12 @@ static void ufs_print_cylinder_stuff(struct super_block *sb,
static const struct super_operations ufs_super_ops;
-static char error_buf[1024];
-
void ufs_error (struct super_block * sb, const char * function,
const char * fmt, ...)
{
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
+ struct va_format vaf;
va_list args;
uspi = UFS_SB(sb)->s_uspi;
@@ -305,20 +303,21 @@ void ufs_error (struct super_block * sb, const char * function,
ufs_mark_sb_dirty(sb);
sb->s_flags |= MS_RDONLY;
}
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) {
case UFS_MOUNT_ONERROR_PANIC:
- panic ("UFS-fs panic (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ panic("panic (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
case UFS_MOUNT_ONERROR_LOCK:
case UFS_MOUNT_ONERROR_UMOUNT:
case UFS_MOUNT_ONERROR_REPAIR:
- printk (KERN_CRIT "UFS-fs error (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
- }
+ pr_crit("error (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ }
+ va_end(args);
}
void ufs_panic (struct super_block * sb, const char * function,
@@ -326,6 +325,7 @@ void ufs_panic (struct super_block * sb, const char * function,
{
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
+ struct va_format vaf;
va_list args;
uspi = UFS_SB(sb)->s_uspi;
@@ -336,24 +336,27 @@ void ufs_panic (struct super_block * sb, const char * function,
ubh_mark_buffer_dirty(USPI_UBH(uspi));
ufs_mark_sb_dirty(sb);
}
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
sb->s_flags |= MS_RDONLY;
- printk (KERN_CRIT "UFS-fs panic (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ pr_crit("panic (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ va_end(args);
}
void ufs_warning (struct super_block * sb, const char * function,
const char * fmt, ...)
{
+ struct va_format vaf;
va_list args;
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
- printk (KERN_WARNING "UFS-fs warning (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_warn("(device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ va_end(args);
}
enum {
@@ -464,14 +467,12 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
ufs_set_opt (*mount_options, ONERROR_UMOUNT);
break;
case Opt_onerror_repair:
- printk("UFS-fs: Unable to do repair on error, "
- "will lock lock instead\n");
+ pr_err("Unable to do repair on error, will lock lock instead\n");
ufs_clear_opt (*mount_options, ONERROR);
ufs_set_opt (*mount_options, ONERROR_REPAIR);
break;
default:
- printk("UFS-fs: Invalid option: \"%s\" "
- "or missing value\n", p);
+ pr_err("Invalid option: \"%s\" or missing value\n", p);
return 0;
}
}
@@ -788,8 +789,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
#ifndef CONFIG_UFS_FS_WRITE
if (!(sb->s_flags & MS_RDONLY)) {
- printk("ufs was compiled with read-only support, "
- "can't be mounted as read-write\n");
+ pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
return -EROFS;
}
#endif
@@ -812,12 +812,12 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_mount_opt = 0;
ufs_set_opt (sbi->s_mount_opt, ONERROR_LOCK);
if (!ufs_parse_options ((char *) data, &sbi->s_mount_opt)) {
- printk("wrong mount options\n");
+ pr_err("wrong mount options\n");
goto failed;
}
if (!(sbi->s_mount_opt & UFS_MOUNT_UFSTYPE)) {
if (!silent)
- printk("You didn't specify the type of your ufs filesystem\n\n"
+ pr_err("You didn't specify the type of your ufs filesystem\n\n"
"mount -t ufs -o ufstype="
"sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n"
">>>WARNING<<< Wrong ufstype may corrupt your filesystem, "
@@ -868,7 +868,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
break;
case UFS_MOUNT_UFSTYPE_SUNOS:
- UFSD(("ufstype=sunos\n"))
+ UFSD("ufstype=sunos\n");
uspi->s_fsize = block_size = 1024;
uspi->s_fmask = ~(1024 - 1);
uspi->s_fshift = 10;
@@ -900,7 +900,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=old is supported read-only\n");
+ pr_info("ufstype=old is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -916,7 +916,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=nextstep is supported read-only\n");
+ pr_info("ufstype=nextstep is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -932,7 +932,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=nextstep-cd is supported read-only\n");
+ pr_info("ufstype=nextstep-cd is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -948,7 +948,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=openstep is supported read-only\n");
+ pr_info("ufstype=openstep is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -963,19 +963,19 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=hp is supported read-only\n");
+ pr_info("ufstype=hp is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
default:
if (!silent)
- printk("unknown ufstype\n");
+ pr_err("unknown ufstype\n");
goto failed;
}
again:
if (!sb_set_blocksize(sb, block_size)) {
- printk(KERN_ERR "UFS: failed to set blocksize\n");
+ pr_err("failed to set blocksize\n");
goto failed;
}
@@ -1034,7 +1034,7 @@ again:
goto again;
}
if (!silent)
- printk("ufs_read_super: bad magic number\n");
+ pr_err("%s(): bad magic number\n", __func__);
goto failed;
magic_found:
@@ -1048,33 +1048,33 @@ magic_found:
uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift);
if (!is_power_of_2(uspi->s_fsize)) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n",
- uspi->s_fsize);
- goto failed;
+ pr_err("%s(): fragment size %u is not a power of 2\n",
+ __func__, uspi->s_fsize);
+ goto failed;
}
if (uspi->s_fsize < 512) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is too small\n",
- uspi->s_fsize);
+ pr_err("%s(): fragment size %u is too small\n",
+ __func__, uspi->s_fsize);
goto failed;
}
if (uspi->s_fsize > 4096) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is too large\n",
- uspi->s_fsize);
+ pr_err("%s(): fragment size %u is too large\n",
+ __func__, uspi->s_fsize);
goto failed;
}
if (!is_power_of_2(uspi->s_bsize)) {
- printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n",
- uspi->s_bsize);
+ pr_err("%s(): block size %u is not a power of 2\n",
+ __func__, uspi->s_bsize);
goto failed;
}
if (uspi->s_bsize < 4096) {
- printk(KERN_ERR "ufs_read_super: block size %u is too small\n",
- uspi->s_bsize);
+ pr_err("%s(): block size %u is too small\n",
+ __func__, uspi->s_bsize);
goto failed;
}
if (uspi->s_bsize / uspi->s_fsize > 8) {
- printk(KERN_ERR "ufs_read_super: too many fragments per block (%u)\n",
- uspi->s_bsize / uspi->s_fsize);
+ pr_err("%s(): too many fragments per block (%u)\n",
+ __func__, uspi->s_bsize / uspi->s_fsize);
goto failed;
}
if (uspi->s_fsize != block_size || uspi->s_sbsize != super_block_size) {
@@ -1113,20 +1113,21 @@ magic_found:
UFSD("fs is DEC OSF/1\n");
break;
case UFS_FSACTIVE:
- printk("ufs_read_super: fs is active\n");
+ pr_err("%s(): fs is active\n", __func__);
sb->s_flags |= MS_RDONLY;
break;
case UFS_FSBAD:
- printk("ufs_read_super: fs is bad\n");
+ pr_err("%s(): fs is bad\n", __func__);
sb->s_flags |= MS_RDONLY;
break;
default:
- printk("ufs_read_super: can't grok fs_clean 0x%x\n", usb1->fs_clean);
+ pr_err("%s(): can't grok fs_clean 0x%x\n",
+ __func__, usb1->fs_clean);
sb->s_flags |= MS_RDONLY;
break;
}
} else {
- printk("ufs_read_super: fs needs fsck\n");
+ pr_err("%s(): fs needs fsck\n", __func__);
sb->s_flags |= MS_RDONLY;
}
@@ -1299,7 +1300,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
new_mount_opt |= ufstype;
} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
- printk("ufstype can't be changed during remount\n");
+ pr_err("ufstype can't be changed during remount\n");
unlock_ufs(sb);
return -EINVAL;
}
@@ -1328,8 +1329,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
* fs was mounted as ro, remounting rw
*/
#ifndef CONFIG_UFS_FS_WRITE
- printk("ufs was compiled with read-only support, "
- "can't be mounted as read-write\n");
+ pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
unlock_ufs(sb);
return -EINVAL;
#else
@@ -1338,12 +1338,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufstype != UFS_MOUNT_UFSTYPE_44BSD &&
ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
- printk("this ufstype is read-only supported\n");
+ pr_err("this ufstype is read-only supported\n");
unlock_ufs(sb);
return -EINVAL;
}
if (!ufs_read_cylinder_structures(sb)) {
- printk("failed during remounting\n");
+ pr_err("failed during remounting\n");
unlock_ufs(sb);
return -EPERM;
}
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 343e6fc571e5..2a07396d5f9e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -1,6 +1,12 @@
#ifndef _UFS_UFS_H
#define _UFS_UFS_H 1
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#define UFS_MAX_GROUP_LOADED 8
#define UFS_CGNO_EMPTY ((unsigned)-1)
@@ -71,9 +77,9 @@ struct ufs_inode_info {
*/
#ifdef CONFIG_UFS_DEBUG
# define UFSD(f, a...) { \
- printk ("UFSD (%s, %d): %s:", \
+ pr_debug("UFSD (%s, %d): %s:", \
__FILE__, __LINE__, __func__); \
- printk (f, ## a); \
+ pr_debug(f, ## a); \
}
#else
# define UFSD(f, a...) /**/
diff --git a/include/linux/cma.h b/include/linux/cma.h
new file mode 100644
index 000000000000..371b93042520
--- /dev/null
+++ b/include/linux/cma.h
@@ -0,0 +1,27 @@
+#ifndef __CMA_H__
+#define __CMA_H__
+
+/*
+ * There is always at least global CMA area and a few optional
+ * areas configured in kernel .config.
+ */
+#ifdef CONFIG_CMA_AREAS
+#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS)
+
+#else
+#define MAX_CMA_AREAS (0)
+
+#endif
+
+struct cma;
+
+extern phys_addr_t cma_get_base(struct cma *cma);
+extern unsigned long cma_get_size(struct cma *cma);
+
+extern int __init cma_declare_contiguous(phys_addr_t size,
+ phys_addr_t base, phys_addr_t limit,
+ phys_addr_t alignment, unsigned int order_per_bit,
+ bool fixed, struct cma **res_cma);
+extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
+extern bool cma_release(struct cma *cma, struct page *pages, int count);
+#endif
diff --git a/include/linux/crc64_ecma.h b/include/linux/crc64_ecma.h
new file mode 100644
index 000000000000..bba7a4d692b3
--- /dev/null
+++ b/include/linux/crc64_ecma.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CRC64_ECMA_H_
+#define __CRC64_ECMA_H_
+
+#include <linux/types.h>
+
+
+#define CRC64_DEFAULT_INITVAL 0xFFFFFFFFFFFFFFFFULL
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * @pdata: pointer to the data to compute checksum for.
+ * @nbytes: number of bytes in data buffer.
+ * @seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed);
+
+#endif /* __CRC64_ECMA_H_ */
diff --git a/include/linux/decompress/bunzip2.h b/include/linux/decompress/bunzip2.h
index 115272137a9c..4d683df898e6 100644
--- a/include/linux/decompress/bunzip2.h
+++ b/include/linux/decompress/bunzip2.h
@@ -1,10 +1,10 @@
#ifndef DECOMPRESS_BUNZIP2_H
#define DECOMPRESS_BUNZIP2_H
-int bunzip2(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int bunzip2(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error)(char *x));
#endif
diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h
index 0c7111a55a1a..1fcfd64b5076 100644
--- a/include/linux/decompress/generic.h
+++ b/include/linux/decompress/generic.h
@@ -1,11 +1,11 @@
#ifndef DECOMPRESS_GENERIC_H
#define DECOMPRESS_GENERIC_H
-typedef int (*decompress_fn) (unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+typedef int (*decompress_fn) (unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *outbuf,
- int *posp,
+ long *posp,
void(*error)(char *x));
/* inbuf - input buffer
@@ -33,7 +33,7 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len,
/* Utility routine to detect the decompression method */
-decompress_fn decompress_method(const unsigned char *inbuf, int len,
+decompress_fn decompress_method(const unsigned char *inbuf, long len,
const char **name);
#endif
diff --git a/include/linux/decompress/inflate.h b/include/linux/decompress/inflate.h
index 1d0aedef9822..e4f411fdbd24 100644
--- a/include/linux/decompress/inflate.h
+++ b/include/linux/decompress/inflate.h
@@ -1,10 +1,10 @@
#ifndef LINUX_DECOMPRESS_INFLATE_H
#define LINUX_DECOMPRESS_INFLATE_H
-int gunzip(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int gunzip(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error_fn)(char *x));
#endif
diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h
index d5b68bf3ec92..3273c2f36496 100644
--- a/include/linux/decompress/unlz4.h
+++ b/include/linux/decompress/unlz4.h
@@ -1,10 +1,10 @@
#ifndef DECOMPRESS_UNLZ4_H
#define DECOMPRESS_UNLZ4_H
-int unlz4(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int unlz4(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error)(char *x));
#endif
diff --git a/include/linux/decompress/unlzma.h b/include/linux/decompress/unlzma.h
index 7796538f1bf4..8a891a193840 100644
--- a/include/linux/decompress/unlzma.h
+++ b/include/linux/decompress/unlzma.h
@@ -1,11 +1,11 @@
#ifndef DECOMPRESS_UNLZMA_H
#define DECOMPRESS_UNLZMA_H
-int unlzma(unsigned char *, int,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int unlzma(unsigned char *, long,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
);
diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h
index 987229752519..af18f95d6570 100644
--- a/include/linux/decompress/unlzo.h
+++ b/include/linux/decompress/unlzo.h
@@ -1,10 +1,10 @@
#ifndef DECOMPRESS_UNLZO_H
#define DECOMPRESS_UNLZO_H
-int unlzo(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int unlzo(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error)(char *x));
#endif
diff --git a/include/linux/decompress/unxz.h b/include/linux/decompress/unxz.h
index 41728fc6c8a1..f764e2a7201e 100644
--- a/include/linux/decompress/unxz.h
+++ b/include/linux/decompress/unxz.h
@@ -10,10 +10,10 @@
#ifndef DECOMPRESS_UNXZ_H
#define DECOMPRESS_UNXZ_H
-int unxz(unsigned char *in, int in_size,
- int (*fill)(void *dest, unsigned int size),
- int (*flush)(void *src, unsigned int size),
- unsigned char *out, int *in_used,
+int unxz(unsigned char *in, long in_size,
+ long (*fill)(void *dest, unsigned long size),
+ long (*flush)(void *src, unsigned long size),
+ unsigned char *out, long *in_used,
void (*error)(char *x));
#endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 772eab5d524a..569bbd039896 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -53,18 +53,13 @@
#ifdef __KERNEL__
+#include <linux/device.h>
+
struct cma;
struct page;
-struct device;
#ifdef CONFIG_DMA_CMA
-/*
- * There is always at least global CMA area and a few optional device
- * private areas configured in kernel .config.
- */
-#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS)
-
extern struct cma *dma_contiguous_default_area;
static inline struct cma *dev_get_cma_area(struct device *dev)
@@ -123,8 +118,6 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
#else
-#define MAX_CMA_AREAS (0)
-
static inline struct cma *dev_get_cma_area(struct device *dev)
{
return NULL;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6eb1fb37de9a..5e7219dc0fae 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -360,7 +360,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
void free_pages_exact(void *virt, size_t size);
/* This is different from alloc_pages_exact_node !!! */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
#define __get_free_page(gfp_mask) \
__get_free_pages((gfp_mask), 0)
diff --git a/include/linux/glob.h b/include/linux/glob.h
new file mode 100644
index 000000000000..c990b7fbf0a8
--- /dev/null
+++ b/include/linux/glob.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_GLOB_H
+#define _LINUX_GLOB_H
+
+#include <linux/types.h> /* For bool */
+#include <linux/compiler.h> /* For __pure */
+
+bool __pure glob_match(char const *pat, char const *str);
+
+#endif /* _LINUX_GLOB_H */
+
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 255cd5cc0754..6a836ef54661 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -41,8 +41,6 @@ extern int hugetlb_max_hstate __read_mostly;
struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
void hugepage_put_subpool(struct hugepage_subpool *spool);
-int PageHuge(struct page *page);
-
void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -109,11 +107,6 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
#else /* !CONFIG_HUGETLB_PAGE */
-static inline int PageHuge(struct page *page)
-{
- return 0;
-}
-
static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
{
}
diff --git a/include/linux/hugetlb_inline.h b/include/linux/hugetlb_inline.h
index 2bb681fbeb35..4d60c82e9fda 100644
--- a/include/linux/hugetlb_inline.h
+++ b/include/linux/hugetlb_inline.h
@@ -10,6 +10,8 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
return !!(vma->vm_flags & VM_HUGETLB);
}
+int PageHuge(struct page *page);
+
#else
static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
@@ -17,6 +19,11 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
return 0;
}
+static inline int PageHuge(struct page *page)
+{
+ return 0;
+}
+
#endif
#endif
diff --git a/include/linux/input.h b/include/linux/input.h
index 82ce323b9986..6453b22372ac 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -79,6 +79,7 @@ struct input_value {
* @led: reflects current state of device's LEDs
* @snd: reflects current state of sound effects
* @sw: reflects current state of device's switches
+ * @leds: leds objects for the device's LEDs
* @open: this method is called when the very first user calls
* input_open_device(). The driver must prepare the device
* to start generating events (start polling thread,
@@ -164,6 +165,8 @@ struct input_dev {
unsigned long snd[BITS_TO_LONGS(SND_CNT)];
unsigned long sw[BITS_TO_LONGS(SW_CNT)];
+ struct led_classdev *leds;
+
int (*open)(struct input_dev *dev);
void (*close)(struct input_dev *dev);
int (*flush)(struct input_dev *dev, struct file *file);
@@ -531,4 +534,22 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
int input_ff_create_memless(struct input_dev *dev, void *data,
int (*play_effect)(struct input_dev *, void *, struct ff_effect *));
+#ifdef CONFIG_INPUT_LEDS
+
+int input_led_connect(struct input_dev *dev);
+void input_led_disconnect(struct input_dev *dev);
+
+#else
+
+static inline int input_led_connect(struct input_dev *dev)
+{
+ return 0;
+}
+
+static inline void input_led_disconnect(struct input_dev *dev)
+{
+}
+
+#endif
+
#endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a9e2268ecccb..e9892041cb41 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -470,6 +470,7 @@ extern enum system_states {
#define TAINT_FIRMWARE_WORKAROUND 11
#define TAINT_OOT_MODULE 12
#define TAINT_UNSIGNED_MODULE 13
+#define TAINT_SOFTLOCKUP 14
extern const char hex_asc[];
#define hex_asc_lo(x) hex_asc[((x) & 0x0f)]
@@ -493,11 +494,6 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
return buf;
}
-static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
-{
- return hex_byte_pack(buf, byte);
-}
-
extern int hex_to_bin(char ch);
extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
@@ -719,23 +715,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
(void) (&_max1 == &_max2); \
_max1 > _max2 ? _max1 : _max2; })
-#define min3(x, y, z) ({ \
- typeof(x) _min1 = (x); \
- typeof(y) _min2 = (y); \
- typeof(z) _min3 = (z); \
- (void) (&_min1 == &_min2); \
- (void) (&_min1 == &_min3); \
- _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
- (_min2 < _min3 ? _min2 : _min3); })
-
-#define max3(x, y, z) ({ \
- typeof(x) _max1 = (x); \
- typeof(y) _max2 = (y); \
- typeof(z) _max3 = (z); \
- (void) (&_max1 == &_max2); \
- (void) (&_max1 == &_max3); \
- _max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
- (_max2 > _max3 ? _max2 : _max3); })
+#define min3(x, y, z) min((typeof(x))min(x, y), z)
+#define max3(x, y, z) max((typeof(x))max(x, y), z)
/**
* min_not_zero - return the minimum that is _not_ zero, unless both are zero
@@ -750,20 +731,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
/**
* clamp - return a value clamped to a given range with strict typechecking
* @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
*
* This macro does strict typechecking of min/max to make sure they are of the
* same type as val. See the unnecessary pointer comparisons.
*/
-#define clamp(val, min, max) ({ \
- typeof(val) __val = (val); \
- typeof(min) __min = (min); \
- typeof(max) __max = (max); \
- (void) (&__val == &__min); \
- (void) (&__val == &__max); \
- __val = __val < __min ? __min: __val; \
- __val > __max ? __max: __val; })
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
/*
* ..and if you can't take the strict
diff --git a/include/linux/klist.h b/include/linux/klist.h
index a370ce57cf1d..61e5b723ae73 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -44,7 +44,7 @@ struct klist_node {
extern void klist_add_tail(struct klist_node *n, struct klist *k);
extern void klist_add_head(struct klist_node *n, struct klist *k);
-extern void klist_add_after(struct klist_node *n, struct klist_node *pos);
+extern void klist_add_behind(struct klist_node *n, struct klist_node *pos);
extern void klist_add_before(struct klist_node *n, struct klist_node *pos);
extern void klist_del(struct klist_node *n);
diff --git a/include/linux/list.h b/include/linux/list.h
index ef9594171062..cbbb96fcead9 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -654,15 +654,15 @@ static inline void hlist_add_before(struct hlist_node *n,
*(n->pprev) = n;
}
-static inline void hlist_add_after(struct hlist_node *n,
- struct hlist_node *next)
+static inline void hlist_add_behind(struct hlist_node *n,
+ struct hlist_node *prev)
{
- next->next = n->next;
- n->next = next;
- next->pprev = &n->next;
+ n->next = prev->next;
+ prev->next = n;
+ n->pprev = &prev->next;
- if(next->next)
- next->next->pprev = &next->next;
+ if (n->next)
+ n->next->pprev = &n->next;
}
/* after that we'll appear to be on some hlist and hlist_del will work */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b660e05b63d4..e8cc45307f8f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -249,7 +249,7 @@ phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
/*
* Set the allocation direction to bottom-up or top-down.
*/
-static inline void memblock_set_bottom_up(bool enable)
+static inline void __init memblock_set_bottom_up(bool enable)
{
memblock.bottom_up = enable;
}
@@ -264,7 +264,7 @@ static inline bool memblock_bottom_up(void)
return memblock.bottom_up;
}
#else
-static inline void memblock_set_bottom_up(bool enable) {}
+static inline void __init memblock_set_bottom_up(bool enable) {}
static inline bool memblock_bottom_up(void) { return false; }
#endif
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index eb65d29516ca..806b8fa15c5f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,39 +54,24 @@ struct mem_cgroup_reclaim_cookie {
};
#ifdef CONFIG_MEMCG
-/*
- * All "charge" functions with gfp_mask should use GFP_KERNEL or
- * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
- * alloc memory but reclaims memory from all available zones. So, "where I want
- * memory from" bits of gfp_mask has no meaning. So any bits of that field is
- * available but adding a rule is better. charge functions' gfp_mask should
- * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
- * codes.
- * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
- */
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask, struct mem_cgroup **memcgp);
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+ bool lrucare);
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
+
+void mem_cgroup_uncharge(struct page *page);
-extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask);
-/* for swap handling */
-extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
- struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
-extern void mem_cgroup_commit_charge_swapin(struct page *page,
- struct mem_cgroup *memcg);
-extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
+/* Batched uncharging */
+void mem_cgroup_uncharge_start(void);
+void mem_cgroup_uncharge_end(void);
-extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask);
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+ bool lrucare);
struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
-/* For coalescing uncharge for reducing memcg' overhead*/
-extern void mem_cgroup_uncharge_start(void);
-extern void mem_cgroup_uncharge_end(void);
-
-extern void mem_cgroup_uncharge_page(struct page *page);
-extern void mem_cgroup_uncharge_cache_page(struct page *page);
-
bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
struct mem_cgroup *memcg);
bool task_in_mem_cgroup(struct task_struct *task,
@@ -113,12 +98,6 @@ bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
-extern void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
- struct mem_cgroup **memcgp);
-extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
- struct page *oldpage, struct page *newpage, bool migration_ok);
-
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
struct mem_cgroup *,
struct mem_cgroup_reclaim_cookie *);
@@ -133,8 +112,6 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
struct task_struct *p);
-extern void mem_cgroup_replace_page_cache(struct page *oldpage,
- struct page *newpage);
static inline void mem_cgroup_oom_enable(void)
{
@@ -233,30 +210,26 @@ void mem_cgroup_print_bad_page(struct page *page);
#else /* CONFIG_MEMCG */
struct mem_cgroup;
-static inline int mem_cgroup_charge_anon(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask)
+static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask,
+ struct mem_cgroup **memcgp)
{
+ *memcgp = NULL;
return 0;
}
-static inline int mem_cgroup_charge_file(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask)
+static inline void mem_cgroup_commit_charge(struct page *page,
+ struct mem_cgroup *memcg,
+ bool lrucare)
{
- return 0;
-}
-
-static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
- struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
-{
- return 0;
}
-static inline void mem_cgroup_commit_charge_swapin(struct page *page,
- struct mem_cgroup *memcg)
+static inline void mem_cgroup_cancel_charge(struct page *page,
+ struct mem_cgroup *memcg)
{
}
-static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
+static inline void mem_cgroup_uncharge(struct page *page)
{
}
@@ -268,11 +241,9 @@ static inline void mem_cgroup_uncharge_end(void)
{
}
-static inline void mem_cgroup_uncharge_page(struct page *page)
-{
-}
-
-static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+static inline void mem_cgroup_migrate(struct page *oldpage,
+ struct page *newpage,
+ bool lrucare)
{
}
@@ -311,17 +282,6 @@ static inline struct cgroup_subsys_state
return NULL;
}
-static inline void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
- struct mem_cgroup **memcgp)
-{
-}
-
-static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
- struct page *oldpage, struct page *newpage, bool migration_ok)
-{
-}
-
static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
@@ -417,10 +377,6 @@ static inline
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
{
}
-static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
- struct page *newpage)
-{
-}
#endif /* CONFIG_MEMCG */
#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 010d125bffbf..79dd9eca054f 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -26,11 +26,12 @@ enum {
MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
};
-/* Types for control the zone type of onlined memory */
+/* Types for control the zone type of onlined and offlined memory */
enum {
- ONLINE_KEEP,
- ONLINE_KERNEL,
- ONLINE_MOVABLE,
+ MMOP_OFFLINE = -1,
+ MMOP_ONLINE_KEEP,
+ MMOP_ONLINE_KERNEL,
+ MMOP_ONLINE_MOVABLE,
};
/*
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 96c5750e3110..21bff4be4379 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -461,6 +461,7 @@ static inline void mm_init_cpumask(struct mm_struct *mm)
#ifdef CONFIG_CPUMASK_OFFSTACK
mm->cpu_vm_mask_var = &mm->cpumask_allocation;
#endif
+ cpumask_clear(mm->cpu_vm_mask_var);
}
/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6cbd1b6c3d20..559e659288fc 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -872,6 +872,8 @@ static inline int zone_movable_is_highmem(void)
{
#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
return movable_zone == ZONE_HIGHMEM;
+#elif defined(CONFIG_HIGHMEM)
+ return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
#else
return 0;
#endif
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8304959ad336..e1f5fcd79792 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -171,13 +171,12 @@ static inline int __TestClearPage##uname(struct page *page) \
#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \
__SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname)
-#define PAGEFLAG_FALSE(uname) \
-static inline int Page##uname(const struct page *page) \
- { return 0; }
-
#define TESTSCFLAG(uname, lname) \
TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
+#define TESTPAGEFLAG_FALSE(uname) \
+static inline int Page##uname(const struct page *page) { return 0; }
+
#define SETPAGEFLAG_NOOP(uname) \
static inline void SetPage##uname(struct page *page) { }
@@ -187,12 +186,21 @@ static inline void ClearPage##uname(struct page *page) { }
#define __CLEARPAGEFLAG_NOOP(uname) \
static inline void __ClearPage##uname(struct page *page) { }
+#define TESTSETFLAG_FALSE(uname) \
+static inline int TestSetPage##uname(struct page *page) { return 0; }
+
#define TESTCLEARFLAG_FALSE(uname) \
static inline int TestClearPage##uname(struct page *page) { return 0; }
#define __TESTCLEARFLAG_FALSE(uname) \
static inline int __TestClearPage##uname(struct page *page) { return 0; }
+#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \
+ SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+
+#define TESTSCFLAG_FALSE(uname) \
+ TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
+
struct page; /* forward declaration */
TESTPAGEFLAG(Locked, locked)
@@ -248,7 +256,6 @@ PAGEFLAG_FALSE(HighMem)
PAGEFLAG(SwapCache, swapcache)
#else
PAGEFLAG_FALSE(SwapCache)
- SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
#endif
PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
@@ -258,8 +265,8 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
#else
-PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
- TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
+ TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 777a524716db..5c831f1eca79 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -3,17 +3,15 @@
enum {
/* flags for mem_cgroup */
- PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
- PCG_USED, /* this object is in use. */
- PCG_MIGRATION, /* under page migration */
- __NR_PCG_FLAGS,
+ PCG_USED = 0x01, /* This page is charged to a memcg */
+ PCG_MEM = 0x02, /* This page holds a memory charge */
+ PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
};
-#ifndef __GENERATING_BOUNDS_H
-#include <generated/bounds.h>
+struct pglist_data;
#ifdef CONFIG_MEMCG
-#include <linux/bit_spinlock.h>
+struct mem_cgroup;
/*
* Page Cgroup can be considered as an extended mem_map.
@@ -27,65 +25,30 @@ struct page_cgroup {
struct mem_cgroup *mem_cgroup;
};
-void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
+extern void pgdat_page_cgroup_init(struct pglist_data *pgdat);
#ifdef CONFIG_SPARSEMEM
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
{
}
-extern void __init page_cgroup_init(void);
+extern void page_cgroup_init(void);
#else
-void __init page_cgroup_init_flatmem(void);
-static inline void __init page_cgroup_init(void)
+extern void page_cgroup_init_flatmem(void);
+static inline void page_cgroup_init(void)
{
}
#endif
struct page_cgroup *lookup_page_cgroup(struct page *page);
-struct page *lookup_cgroup_page(struct page_cgroup *pc);
-
-#define TESTPCGFLAG(uname, lname) \
-static inline int PageCgroup##uname(struct page_cgroup *pc) \
- { return test_bit(PCG_##lname, &pc->flags); }
-
-#define SETPCGFLAG(uname, lname) \
-static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
- { set_bit(PCG_##lname, &pc->flags); }
-
-#define CLEARPCGFLAG(uname, lname) \
-static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
- { clear_bit(PCG_##lname, &pc->flags); }
-
-#define TESTCLEARPCGFLAG(uname, lname) \
-static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
- { return test_and_clear_bit(PCG_##lname, &pc->flags); }
-
-TESTPCGFLAG(Used, USED)
-CLEARPCGFLAG(Used, USED)
-SETPCGFLAG(Used, USED)
-
-SETPCGFLAG(Migration, MIGRATION)
-CLEARPCGFLAG(Migration, MIGRATION)
-TESTPCGFLAG(Migration, MIGRATION)
-static inline void lock_page_cgroup(struct page_cgroup *pc)
+static inline int PageCgroupUsed(struct page_cgroup *pc)
{
- /*
- * Don't take this lock in IRQ context.
- * This lock is for pc->mem_cgroup, USED, MIGRATION
- */
- bit_spin_lock(PCG_LOCK, &pc->flags);
+ return !!(pc->flags & PCG_USED);
}
-
-static inline void unlock_page_cgroup(struct page_cgroup *pc)
-{
- bit_spin_unlock(PCG_LOCK, &pc->flags);
-}
-
-#else /* CONFIG_MEMCG */
+#else /* !CONFIG_MEMCG */
struct page_cgroup;
-static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
+static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
}
@@ -98,10 +61,9 @@ static inline void page_cgroup_init(void)
{
}
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
{
}
-
#endif /* CONFIG_MEMCG */
#include <linux/swap.h>
@@ -140,6 +102,4 @@ static inline void swap_cgroup_swapoff(int type)
#endif /* CONFIG_MEMCG_SWAP */
-#endif /* !__GENERATING_BOUNDS_H */
-
#endif /* __LINUX_PAGE_CGROUP_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0a97b583ee8d..c74f8bbef87a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -411,6 +411,34 @@ static inline loff_t page_file_offset(struct page *page)
return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT;
}
+/*
+ * Get the order of a given page in the context of the pagecache which it
+ * belongs to.
+ *
+ * Pagecache unit size is not a fixed value (hugetlbfs is an example), but the
+ * vma_interval_tree and anon_vma_interval_tree APIs assume that indices are in
+ * PAGE_SIZE units. So this function helps us to get normalized indices.
+ *
+ * page_size_order() should be called only for pagecache pages/hugepages and
+ * anonymous pages/hugepages, because pagecache unit size is irrelevant except
+ * for those pages.
+ */
+static inline unsigned int page_size_order(struct page *page)
+{
+ return unlikely(PageHuge(page)) ?
+ compound_order(compound_head(page)) :
+ (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+}
+
+/*
+ * page->index stores pagecache index whose unit is not always PAGE_SIZE.
+ * This function converts it into PAGE_SIZE offset.
+ */
+static inline pgoff_t page_pgoff(struct page *page)
+{
+ return page->index << page_size_order(page);
+}
+
extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
unsigned long address);
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8183b46fbaa2..372ad5e0dcb8 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -432,9 +432,9 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
}
/**
- * hlist_add_after_rcu
- * @prev: the existing element to add the new element after.
+ * hlist_add_behind_rcu
* @n: the new element to add to the hash list.
+ * @prev: the existing element to add the new element after.
*
* Description:
* Adds the specified element to the specified hlist
@@ -449,8 +449,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
* hlist_for_each_entry_rcu(), used to prevent memory-consistency
* problems on Alpha CPUs.
*/
-static inline void hlist_add_after_rcu(struct hlist_node *prev,
- struct hlist_node *n)
+static inline void hlist_add_behind_rcu(struct hlist_node *n,
+ struct hlist_node *prev)
{
n->next = prev->next;
n->pprev = &prev->next;
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index a964f7285600..4b152c81c5fa 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -136,7 +136,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
struct scatterlist *sgl)
{
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
BUG();
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3cfbc05e66e6..322d4fc8976c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -33,6 +33,7 @@ struct sched_param {
#include <linux/smp.h>
#include <linux/sem.h>
+#include <linux/shm.h>
#include <linux/signal.h>
#include <linux/compiler.h>
#include <linux/completion.h>
@@ -1386,6 +1387,7 @@ struct task_struct {
#ifdef CONFIG_SYSVIPC
/* ipc stuff */
struct sysv_sem sysvsem;
+ struct sysv_shm sysvshm;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */
diff --git a/include/linux/shm.h b/include/linux/shm.h
index 57d77709fbe2..6fb801686ad6 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -1,6 +1,7 @@
#ifndef _LINUX_SHM_H_
#define _LINUX_SHM_H_
+#include <linux/list.h>
#include <asm/page.h>
#include <uapi/linux/shm.h>
#include <asm/shmparam.h>
@@ -20,6 +21,7 @@ struct shmid_kernel /* private to the kernel */
/* The task created the shm object. NULL if the task is dead. */
struct task_struct *shm_creator;
+ struct list_head shm_clist; /* list by creator */
};
/* shm_mode upper byte flags */
@@ -44,11 +46,20 @@ struct shmid_kernel /* private to the kernel */
#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
#ifdef CONFIG_SYSVIPC
+struct sysv_shm {
+ struct list_head shm_clist;
+};
+
long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
unsigned long shmlba);
-extern int is_file_shm_hugepages(struct file *file);
-extern void exit_shm(struct task_struct *task);
+int is_file_shm_hugepages(struct file *file);
+void exit_shm(struct task_struct *task);
+#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
#else
+struct sysv_shm {
+ /* empty */
+};
+
static inline long do_shmat(int shmid, char __user *shmaddr,
int shmflg, unsigned long *addr,
unsigned long shmlba)
@@ -62,6 +73,9 @@ static inline int is_file_shm_hugepages(struct file *file)
static inline void exit_shm(struct task_struct *task)
{
}
+static inline void shm_init_task(struct task_struct *task)
+{
+}
#endif
#endif /* _LINUX_SHM_H_ */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 1d9abb7d22a0..68b1feaba9d6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -526,20 +526,22 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
* @memcg: pointer to the memcg this cache belongs to
* @list: list_head for the list of all caches in this memcg
* @root_cache: pointer to the global, root cache, this cache was derived from
- * @nr_pages: number of pages that belongs to this cache.
+ * @refcnt: reference counter
+ * @dead: set to true when owner memcg is turned offline
+ * @unregister_work: worker to destroy the cache
*/
struct memcg_cache_params {
bool is_root_cache;
+ struct rcu_head rcu_head;
union {
- struct {
- struct rcu_head rcu_head;
- struct kmem_cache *memcg_caches[0];
- };
+ struct kmem_cache *memcg_caches[0];
struct {
struct mem_cgroup *memcg;
struct list_head list;
struct kmem_cache *root_cache;
- atomic_t nr_pages;
+ atomic_long_t refcnt;
+ bool dead;
+ struct work_struct unregister_work;
};
};
};
diff --git a/include/linux/string.h b/include/linux/string.h
index d36977e029af..2663afcc0861 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -117,6 +117,7 @@ void *memchr_inv(const void *s, int c, size_t n);
extern char *kstrdup(const char *s, gfp_t gfp);
extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
+extern char *kstrimdup(const char *s, gfp_t gfp);
extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4bdbee80eede..94fd0b23f3f9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -321,6 +321,9 @@ extern void swap_setup(void);
extern void add_page_to_unevictable_list(struct page *page);
+extern void lru_cache_add_active_or_unevictable(struct page *page,
+ struct vm_area_struct *vma);
+
/* linux/mm/vmscan.c */
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
@@ -379,9 +382,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
}
#endif
#ifdef CONFIG_MEMCG_SWAP
-extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
+extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
+extern void mem_cgroup_uncharge_swap(swp_entry_t entry);
#else
-static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
+static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+}
+static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
{
}
#endif
@@ -441,7 +448,7 @@ extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern void swap_free(swp_entry_t);
-extern void swapcache_free(swp_entry_t, struct page *page);
+extern void swapcache_free(swp_entry_t);
extern int free_swap_and_cache(swp_entry_t);
extern int swap_type_of(dev_t, sector_t, struct block_device **);
extern unsigned int count_swap_pages(int, int);
@@ -505,7 +512,7 @@ static inline void swap_free(swp_entry_t swp)
{
}
-static inline void swapcache_free(swp_entry_t swp, struct page *page)
+static inline void swapcache_free(swp_entry_t swp)
{
}
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 14a8ff2de11e..b7361f831226 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -34,8 +34,6 @@ struct ctl_table_root;
struct ctl_table_header;
struct ctl_dir;
-typedef struct ctl_table ctl_table;
-
typedef int proc_handler (struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 13af0d450bf6..1e9cb575734c 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -11,7 +11,7 @@ struct zbud_ops {
struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
void zbud_destroy_pool(struct zbud_pool *pool);
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size,
unsigned long *handle);
void zbud_free(struct zbud_pool *pool, unsigned long handle);
int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
diff --git a/include/linux/zlib.h b/include/linux/zlib.h
index 9c5a6b4de0a3..9deeeafc6e1f 100644
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -83,11 +83,11 @@ struct internal_state;
typedef struct z_stream_s {
const Byte *next_in; /* next input byte */
- uInt avail_in; /* number of bytes available at next_in */
+ uLong avail_in; /* number of bytes available at next_in */
uLong total_in; /* total nb of input bytes read so far */
Byte *next_out; /* next output byte should be put there */
- uInt avail_out; /* remaining free space at next_out */
+ uLong avail_out; /* remaining free space at next_out */
uLong total_out; /* total nb of bytes output so far */
char *msg; /* last error message, NULL if no error */
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
new file mode 100644
index 000000000000..27b93730d102
--- /dev/null
+++ b/include/linux/zpool.h
@@ -0,0 +1,224 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for the zbud and zsmalloc memory
+ * storage pool implementations. Typically, this is used to
+ * store compressed memory.
+ */
+
+#ifndef _ZPOOL_H_
+#define _ZPOOL_H_
+
+struct zpool;
+
+struct zpool_ops {
+ int (*evict)(struct zpool *pool, unsigned long handle);
+};
+
+/*
+ * Control how a handle is mapped. It will be ignored if the
+ * implementation does not support it. Its use is optional.
+ * Note that this does not refer to memory protection, it
+ * refers to how the memory will be copied in/out if copying
+ * is necessary during mapping; read-write is the safest as
+ * it copies the existing memory in on map, and copies the
+ * changed memory back out on unmap. Write-only does not copy
+ * in the memory and should only be used for initialization.
+ * If in doubt, use ZPOOL_MM_DEFAULT which is read-write.
+ */
+enum zpool_mapmode {
+ ZPOOL_MM_RW, /* normal read-write mapping */
+ ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */
+ ZPOOL_MM_WO, /* write-only (no copy-in at map time) */
+
+ ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
+};
+
+/**
+ * zpool_create_pool() - Create a new zpool
+ * @type The type of the zpool to create (e.g. zbud, zsmalloc)
+ * @flags What GFP flags should be used when the zpool allocates memory.
+ * @ops The optional ops callback.
+ *
+ * This creates a new zpool of the specified type. The zpool will use the
+ * given flags when allocating any memory. If the ops param is NULL, then
+ * the created zpool will not be shrinkable.
+ *
+ * Returns: New zpool on success, NULL on failure.
+ */
+struct zpool *zpool_create_pool(char *type, gfp_t flags,
+ struct zpool_ops *ops);
+
+/**
+ * zpool_get_type() - Get the type of the zpool
+ * @pool The zpool to check
+ *
+ * This returns the type of the pool.
+ *
+ * Returns: The type of zpool.
+ */
+char *zpool_get_type(struct zpool *pool);
+
+/**
+ * zpool_destroy_pool() - Destroy a zpool
+ * @pool The zpool to destroy.
+ *
+ * This destroys an existing zpool. The zpool should not be in use.
+ */
+void zpool_destroy_pool(struct zpool *pool);
+
+/**
+ * zpool_malloc() - Allocate memory
+ * @pool The zpool to allocate from.
+ * @size The amount of memory to allocate.
+ * @handle Pointer to the handle to set
+ *
+ * This allocates the requested amount of memory from the pool.
+ * The provided @handle will be set to the allocated object handle.
+ *
+ * Returns: 0 on success, negative value on error.
+ */
+int zpool_malloc(struct zpool *pool, size_t size, unsigned long *handle);
+
+/**
+ * zpool_free() - Free previously allocated memory
+ * @pool The zpool that allocated the memory.
+ * @handle The handle to the memory to free.
+ *
+ * This frees previously allocated memory. This does not guarantee
+ * that the pool will actually free memory, only that the memory
+ * in the pool will become available for use by the pool.
+ */
+void zpool_free(struct zpool *pool, unsigned long handle);
+
+/**
+ * zpool_shrink() - Shrink the pool size
+ * @pool The zpool to shrink.
+ * @pages The number of pages to shrink the pool.
+ * @reclaimed The number of pages successfully evicted.
+ *
+ * This attempts to shrink the actual memory size of the pool
+ * by evicting currently used handle(s). If the pool was
+ * created with no zpool_ops, or the evict call fails for any
+ * of the handles, this will fail. If non-NULL, the @reclaimed
+ * parameter will be set to the number of pages reclaimed,
+ * which may be more than the number of pages requested.
+ *
+ * Returns: 0 on success, negative value on error/failure.
+ */
+int zpool_shrink(struct zpool *pool, unsigned int pages,
+ unsigned int *reclaimed);
+
+/**
+ * zpool_map_handle() - Map a previously allocated handle into memory
+ * @pool The zpool that the handle was allocated from
+ * @handle The handle to map
+ * @mm How the memory should be mapped
+ *
+ * This maps a previously allocated handle into memory. The @mm
+ * param indicates to the implementation how the memory will be
+ * used, i.e. read-only, write-only, read-write. If the
+ * implementation does not support it, the memory will be treated
+ * as read-write.
+ *
+ * This may hold locks, disable interrupts, and/or preemption,
+ * and the zpool_unmap_handle() must be called to undo those
+ * actions. The code that uses the mapped handle should complete
+ * its operatons on the mapped handle memory quickly and unmap
+ * as soon as possible. Multiple handles should not be mapped
+ * concurrently on a cpu.
+ *
+ * Returns: A pointer to the handle's mapped memory area.
+ */
+void *zpool_map_handle(struct zpool *pool, unsigned long handle,
+ enum zpool_mapmode mm);
+
+/**
+ * zpool_unmap_handle() - Unmap a previously mapped handle
+ * @pool The zpool that the handle was allocated from
+ * @handle The handle to unmap
+ *
+ * This unmaps a previously mapped handle. Any locks or other
+ * actions that the implementation took in zpool_map_handle()
+ * will be undone here. The memory area returned from
+ * zpool_map_handle() should no longer be used after this.
+ */
+void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
+
+/**
+ * zpool_get_total_size() - The total size of the pool
+ * @pool The zpool to check
+ *
+ * This returns the total size in bytes of the pool.
+ *
+ * Returns: Total size of the zpool in bytes.
+ */
+u64 zpool_get_total_size(struct zpool *pool);
+
+
+/**
+ * struct zpool_driver - driver implementation for zpool
+ * @type: name of the driver.
+ * @list: entry in the list of zpool drivers.
+ * @create: create a new pool.
+ * @destroy: destroy a pool.
+ * @malloc: allocate mem from a pool.
+ * @free: free mem from a pool.
+ * @shrink: shrink the pool.
+ * @map: map a handle.
+ * @unmap: unmap a handle.
+ * @total_size: get total size of a pool.
+ *
+ * This is created by a zpool implementation and registered
+ * with zpool.
+ */
+struct zpool_driver {
+ char *type;
+ struct module *owner;
+ struct list_head list;
+
+ void *(*create)(gfp_t gfp, struct zpool_ops *ops);
+ void (*destroy)(void *pool);
+
+ int (*malloc)(void *pool, size_t size, unsigned long *handle);
+ void (*free)(void *pool, unsigned long handle);
+
+ int (*shrink)(void *pool, unsigned int pages,
+ unsigned int *reclaimed);
+
+ void *(*map)(void *pool, unsigned long handle,
+ enum zpool_mapmode mm);
+ void (*unmap)(void *pool, unsigned long handle);
+
+ u64 (*total_size)(void *pool);
+};
+
+/**
+ * zpool_register_driver() - register a zpool implementation.
+ * @driver: driver to register
+ */
+void zpool_register_driver(struct zpool_driver *driver);
+
+/**
+ * zpool_unregister_driver() - unregister a zpool implementation.
+ * @driver: driver to unregister.
+ *
+ * Module usage counting is used to prevent using a driver
+ * while/after unloading. Please only call unregister from
+ * module exit function.
+ */
+void zpool_unregister_driver(struct zpool_driver *driver);
+
+/**
+ * zpool_evict() - evict callback from a zpool implementation.
+ * @pool: pool to evict from.
+ * @handle: handle to evict.
+ *
+ * This can be used by zpool implementations to call the
+ * user's evict zpool_ops struct evict callback.
+ */
+int zpool_evict(void *pool, unsigned long handle);
+
+#endif
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 91e2e4215ba0..4b69139d9839 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -31,7 +31,7 @@ enum scsi_timeouts {
* Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
* is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
*/
-#ifdef ARCH_HAS_SG_CHAIN
+#ifdef CONFIG_ARCH_HAS_SG_CHAIN
#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048
#else
#define SCSI_MAX_SG_CHAIN_SEGMENTS SCSI_MAX_SG_SEGMENTS
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 4e4f2f8b1ac2..dd2b5467d905 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -17,6 +17,7 @@
{MR_MEMORY_HOTPLUG, "memory_hotplug"}, \
{MR_SYSCALL, "syscall_or_cpuset"}, \
{MR_MEMPOLICY_MBIND, "mempolicy_mbind"}, \
+ {MR_NUMA_MISPLACED, "numa_misplaced"}, \
{MR_CMA, "cma"}
TRACE_EVENT(mm_migrate_pages,
diff --git a/init/Kconfig b/init/Kconfig
index 25ce01c9c298..fe1b6f8918f3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -807,15 +807,53 @@ config LOG_BUF_SHIFT
range 12 21
default 17
help
- Select kernel log buffer size as a power of 2.
+ Select the minimal kernel log buffer size as a power of 2.
+ The final size is affected by LOG_CPU_MAX_BUF_SHIFT config
+ parameter, see below. Any higher size also might be forced
+ by "log_buf_len" boot parameter.
+
Examples:
- 17 => 128 KB
+ 17 => 128 KB
16 => 64 KB
- 15 => 32 KB
- 14 => 16 KB
+ 15 => 32 KB
+ 14 => 16 KB
13 => 8 KB
12 => 4 KB
+config LOG_CPU_MAX_BUF_SHIFT
+ int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
+ range 0 21
+ default 12 if !BASE_SMALL
+ default 0 if BASE_SMALL
+ help
+ This option allows to increase the default ring buffer size
+ according to the number of CPUs. The value defines the contribution
+ of each CPU as a power of 2. The used space is typically only few
+ lines however it might be much more when problems are reported,
+ e.g. backtraces.
+
+ The increased size means that a new buffer has to be allocated and
+ the original static one is unused. It makes sense only on systems
+ with more CPUs. Therefore this value is used only when the sum of
+ contributions is greater than the half of the default kernel ring
+ buffer as defined by LOG_BUF_SHIFT. The default values are set
+ so that more than 64 CPUs are needed to trigger the allocation.
+
+ Also this option is ignored when "log_buf_len" kernel parameter is
+ used as it forces an exact (power of two) size of the ring buffer.
+
+ The number of possible CPUs is used for this computation ignoring
+ hotplugging making the compuation optimal for the the worst case
+ scenerio while allowing a simple algorithm to be used from bootup.
+
+ Examples shift values and their meaning:
+ 17 => 128 KB for each CPU
+ 16 => 64 KB for each CPU
+ 15 => 32 KB for each CPU
+ 14 => 16 KB for each CPU
+ 13 => 8 KB for each CPU
+ 12 => 4 KB for each CPU
+
#
# Architectures with an unreliable sched_clock() should select this:
#
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 82f22885c87e..b6237c31b0e2 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -539,12 +539,6 @@ void __init prepare_namespace(void)
{
int is_floppy;
- if (root_delay) {
- printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
- root_delay);
- ssleep(root_delay);
- }
-
/*
* wait for the known devices to complete their probing
*
@@ -571,6 +565,12 @@ void __init prepare_namespace(void)
if (initrd_load())
goto out;
+ if (root_delay) {
+ pr_info("Waiting %d sec before mounting root device...\n",
+ root_delay);
+ ssleep(root_delay);
+ }
+
/* wait for any asynchronous scanning to complete */
if ((ROOT_DEV == 0) && root_wait) {
printk(KERN_INFO "Waiting for root device %s...\n",
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index a8227022e3a0..e5d059e8aa11 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -311,9 +311,9 @@ static int exit_code;
static int decompress_error;
static int crd_infd, crd_outfd;
-static int __init compr_fill(void *buf, unsigned int len)
+static long __init compr_fill(void *buf, unsigned long len)
{
- int r = sys_read(crd_infd, buf, len);
+ long r = sys_read(crd_infd, buf, len);
if (r < 0)
printk(KERN_ERR "RAMDISK: error while reading compressed data");
else if (r == 0)
@@ -321,13 +321,13 @@ static int __init compr_fill(void *buf, unsigned int len)
return r;
}
-static int __init compr_flush(void *window, unsigned int outcnt)
+static long __init compr_flush(void *window, unsigned long outcnt)
{
- int written = sys_write(crd_outfd, window, outcnt);
+ long written = sys_write(crd_outfd, window, outcnt);
if (written != outcnt) {
if (decompress_error == 0)
printk(KERN_ERR
- "RAMDISK: incomplete write (%d != %d)\n",
+ "RAMDISK: incomplete write (%ld != %ld)\n",
written, outcnt);
decompress_error = 1;
return -1;
diff --git a/init/initramfs.c b/init/initramfs.c
index a8497fab1c3d..a7566031242e 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -19,6 +19,29 @@
#include <linux/syscalls.h>
#include <linux/utime.h>
+static ssize_t __init xwrite(int fd, const char *p, size_t count)
+{
+ ssize_t out = 0;
+
+ /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
+ while (count) {
+ ssize_t rv = sys_write(fd, p, count);
+
+ if (rv < 0) {
+ if (rv == -EINTR || rv == -EAGAIN)
+ continue;
+ return out ? out : rv;
+ } else if (rv == 0)
+ break;
+
+ p += rv;
+ out += rv;
+ count -= rv;
+ }
+
+ return out;
+}
+
static __initdata char *message;
static void __init error(char *x)
{
@@ -174,7 +197,7 @@ static __initdata enum state {
} state, next_state;
static __initdata char *victim;
-static __initdata unsigned count;
+static unsigned long count __initdata;
static __initdata loff_t this_header, next_header;
static inline void __init eat(unsigned n)
@@ -186,7 +209,7 @@ static inline void __init eat(unsigned n)
static __initdata char *vcollected;
static __initdata char *collected;
-static __initdata int remains;
+static long remains __initdata;
static __initdata char *collect;
static void __init read_into(char *buf, unsigned size, enum state next)
@@ -213,7 +236,7 @@ static int __init do_start(void)
static int __init do_collect(void)
{
- unsigned n = remains;
+ unsigned long n = remains;
if (count < n)
n = count;
memcpy(collect, victim, n);
@@ -346,7 +369,7 @@ static int __init do_name(void)
static int __init do_copy(void)
{
if (count >= body_len) {
- sys_write(wfd, victim, body_len);
+ xwrite(wfd, victim, body_len);
sys_close(wfd);
do_utime(vcollected, mtime);
kfree(vcollected);
@@ -354,7 +377,7 @@ static int __init do_copy(void)
state = SkipIt;
return 0;
} else {
- sys_write(wfd, victim, count);
+ xwrite(wfd, victim, count);
body_len -= count;
eat(count);
return 1;
@@ -384,7 +407,7 @@ static __initdata int (*actions[])(void) = {
[Reset] = do_reset,
};
-static int __init write_buffer(char *buf, unsigned len)
+static long __init write_buffer(char *buf, unsigned long len)
{
count = len;
victim = buf;
@@ -394,11 +417,11 @@ static int __init write_buffer(char *buf, unsigned len)
return len - count;
}
-static int __init flush_buffer(void *bufv, unsigned len)
+static long __init flush_buffer(void *bufv, unsigned long len)
{
char *buf = (char *) bufv;
- int written;
- int origLen = len;
+ long written;
+ long origLen = len;
if (message)
return -1;
while ((written = write_buffer(buf, len)) < len && !message) {
@@ -417,13 +440,13 @@ static int __init flush_buffer(void *bufv, unsigned len)
return origLen;
}
-static unsigned my_inptr; /* index of next byte to be processed in inbuf */
+static unsigned long my_inptr; /* index of next byte to be processed in inbuf */
#include <linux/decompress/generic.h>
-static char * __init unpack_to_rootfs(char *buf, unsigned len)
+static char * __init unpack_to_rootfs(char *buf, unsigned long len)
{
- int written, res;
+ long written;
decompress_fn decompress;
const char *compress_name;
static __initdata char msg_buf[64];
@@ -457,7 +480,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len)
decompress = decompress_method(buf, len, &compress_name);
pr_debug("Detected %s compressed data\n", compress_name);
if (decompress) {
- res = decompress(buf, len, NULL, flush_buffer, NULL,
+ int res = decompress(buf, len, NULL, flush_buffer, NULL,
&my_inptr, error);
if (res)
error("decompressor failed");
@@ -603,8 +626,13 @@ static int __init populate_rootfs(void)
fd = sys_open("/initrd.image",
O_WRONLY|O_CREAT, 0700);
if (fd >= 0) {
- sys_write(fd, (char *)initrd_start,
- initrd_end - initrd_start);
+ ssize_t written = xwrite(fd, (char *)initrd_start,
+ initrd_end - initrd_start);
+
+ if (written != initrd_end - initrd_start)
+ pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
+ written, initrd_end - initrd_start);
+
sys_close(fd);
free_initrd();
}
diff --git a/ipc/shm.c b/ipc/shm.c
index 89fc354156cb..7fc9f9f3a26b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -178,6 +178,7 @@ static void shm_rcu_free(struct rcu_head *head)
static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
{
+ list_del(&s->shm_clist);
ipc_rmid(&shm_ids(ns), &s->shm_perm);
}
@@ -268,37 +269,6 @@ static void shm_close(struct vm_area_struct *vma)
}
/* Called with ns->shm_ids(ns).rwsem locked */
-static int shm_try_destroy_current(int id, void *p, void *data)
-{
- struct ipc_namespace *ns = data;
- struct kern_ipc_perm *ipcp = p;
- struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
-
- if (shp->shm_creator != current)
- return 0;
-
- /*
- * Mark it as orphaned to destroy the segment when
- * kernel.shm_rmid_forced is changed.
- * It is noop if the following shm_may_destroy() returns true.
- */
- shp->shm_creator = NULL;
-
- /*
- * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
- * is not set, it shouldn't be deleted here.
- */
- if (!ns->shm_rmid_forced)
- return 0;
-
- if (shm_may_destroy(ns, shp)) {
- shm_lock_by_ptr(shp);
- shm_destroy(ns, shp);
- }
- return 0;
-}
-
-/* Called with ns->shm_ids(ns).rwsem locked */
static int shm_try_destroy_orphaned(int id, void *p, void *data)
{
struct ipc_namespace *ns = data;
@@ -329,18 +299,50 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
up_write(&shm_ids(ns).rwsem);
}
-
+/* Locking assumes this will only be called with task == current */
void exit_shm(struct task_struct *task)
{
struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+ struct shmid_kernel *shp, *n;
- if (shm_ids(ns).in_use == 0)
+ if (list_empty(&task->sysvshm.shm_clist))
return;
- /* Destroy all already created segments, but not mapped yet */
+ /*
+ * If kernel.shm_rmid_forced is not set then only keep track of
+ * which shmids are orphaned, so that a later set of the sysctl
+ * can clean them up.
+ */
+ if (!ns->shm_rmid_forced) {
+ down_read(&shm_ids(ns).rwsem);
+ list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
+ shp->shm_creator = NULL;
+ /*
+ * Only under read lock but we are only called on current
+ * so no entry on the list will be shared.
+ */
+ list_del(&task->sysvshm.shm_clist);
+ up_read(&shm_ids(ns).rwsem);
+ return;
+ }
+
+ /*
+ * Destroy all already created segments, that were not yet mapped,
+ * and mark any mapped as orphan to cover the sysctl toggling.
+ * Destroy is skipped if shm_may_destroy() returns false.
+ */
down_write(&shm_ids(ns).rwsem);
- if (shm_ids(ns).in_use)
- idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
+ list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
+ shp->shm_creator = NULL;
+
+ if (shm_may_destroy(ns, shp)) {
+ shm_lock_by_ptr(shp);
+ shm_destroy(ns, shp);
+ }
+ }
+
+ /* Remove the list head from any segments still attached. */
+ list_del(&task->sysvshm.shm_clist);
up_write(&shm_ids(ns).rwsem);
}
@@ -561,6 +563,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
shp->shm_nattch = 0;
shp->shm_file = file;
shp->shm_creator = current;
+ list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
/*
* shmid gets reported as "inode#" in /proc/pid/maps.
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 8e9bc9c3dbb7..c447cd9848d1 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -106,7 +106,7 @@ static inline struct audit_entry *audit_init_entry(u32 field_count)
if (unlikely(!entry))
return NULL;
- fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL);
+ fields = kcalloc(field_count, sizeof(*fields), GFP_KERNEL);
if (unlikely(!fields)) {
kfree(entry);
return NULL;
@@ -160,7 +160,7 @@ static __u32 *classes[AUDIT_SYSCALL_CLASSES];
int __init audit_register_class(int class, unsigned *list)
{
- __u32 *p = kzalloc(AUDIT_BITMASK_SIZE * sizeof(__u32), GFP_KERNEL);
+ __u32 *p = kcalloc(AUDIT_BITMASK_SIZE, sizeof(__u32), GFP_KERNEL);
if (!p)
return -ENOMEM;
while (*list != ~0U) {
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 9fd4246b04b8..e1d1d1952bfa 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,7 +9,6 @@
#include <linux/page-flags.h>
#include <linux/mmzone.h>
#include <linux/kbuild.h>
-#include <linux/page_cgroup.h>
#include <linux/log2.h>
#include <linux/spinlock_types.h>
@@ -18,7 +17,6 @@ void foo(void)
/* The enum constants to put into include/generated/bounds.h */
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
- DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
#ifdef CONFIG_SMP
DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
#endif
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index c445e392e93f..46b7c31537ee 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -167,6 +167,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
/* For mmu_notifiers */
const unsigned long mmun_start = addr;
const unsigned long mmun_end = addr + PAGE_SIZE;
+ struct mem_cgroup *memcg;
+
+ err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
+ if (err)
+ return err;
/* For try_to_free_swap() and munlock_vma_page() below */
lock_page(page);
@@ -179,6 +184,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
get_page(kpage);
page_add_new_anon_rmap(kpage, vma, addr);
+ mem_cgroup_commit_charge(kpage, memcg, false);
+ lru_cache_add_active_or_unevictable(kpage, vma);
if (!PageAnon(page)) {
dec_mm_counter(mm, MM_FILEPAGES);
@@ -200,6 +207,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
err = 0;
unlock:
+ mem_cgroup_cancel_charge(kpage, memcg);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
unlock_page(page);
return err;
@@ -315,18 +323,11 @@ retry:
if (!new_page)
goto put_old;
- if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
- goto put_new;
-
__SetPageUptodate(new_page);
copy_highpage(new_page, old_page);
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
ret = __replace_page(vma, vaddr, old_page, new_page);
- if (ret)
- mem_cgroup_uncharge_page(new_page);
-
-put_new:
page_cache_release(new_page);
put_old:
put_page(old_page);
diff --git a/kernel/fork.c b/kernel/fork.c
index 6a13c46cd87d..dd8864fade31 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -365,12 +365,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
*/
down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
- mm->locked_vm = 0;
- mm->mmap = NULL;
- mm->vmacache_seqnum = 0;
- mm->map_count = 0;
- cpumask_clear(mm_cpumask(mm));
- mm->mm_rb = RB_ROOT;
+ mm->total_vm = oldmm->total_vm;
+ mm->shared_vm = oldmm->shared_vm;
+ mm->exec_vm = oldmm->exec_vm;
+ mm->stack_vm = oldmm->stack_vm;
+
rb_link = &mm->mm_rb.rb_node;
rb_parent = NULL;
pprev = &mm->mmap;
@@ -529,17 +528,28 @@ static void mm_init_aio(struct mm_struct *mm)
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
{
+ mm->mmap = NULL;
+ mm->mm_rb = RB_ROOT;
+ mm->vmacache_seqnum = 0;
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem);
INIT_LIST_HEAD(&mm->mmlist);
mm->core_state = NULL;
atomic_long_set(&mm->nr_ptes, 0);
+ mm->map_count = 0;
+ mm->locked_vm = 0;
+ mm->pinned_vm = 0;
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
+ mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
+ mmu_notifier_mm_init(mm);
clear_tlb_flush_pending(mm);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+ mm->pmd_huge_pte = NULL;
+#endif
if (current->mm) {
mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -549,11 +559,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
mm->def_flags = 0;
}
- if (likely(!mm_alloc_pgd(mm))) {
- mmu_notifier_mm_init(mm);
- return mm;
- }
+ if (mm_alloc_pgd(mm))
+ goto fail_nopgd;
+
+ if (init_new_context(p, mm))
+ goto fail_nocontext;
+ return mm;
+
+fail_nocontext:
+ mm_free_pgd(mm);
+fail_nopgd:
free_mm(mm);
return NULL;
}
@@ -587,7 +603,6 @@ struct mm_struct *mm_alloc(void)
return NULL;
memset(mm, 0, sizeof(*mm));
- mm_init_cpumask(mm);
return mm_init(mm, current);
}
@@ -819,17 +834,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
goto fail_nomem;
memcpy(mm, oldmm, sizeof(*mm));
- mm_init_cpumask(mm);
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
- mm->pmd_huge_pte = NULL;
-#endif
if (!mm_init(mm, tsk))
goto fail_nomem;
- if (init_new_context(tsk, mm))
- goto fail_nocontext;
-
dup_mm_exe_file(oldmm, mm);
err = dup_mmap(mm, oldmm);
@@ -851,15 +859,6 @@ free_pt:
fail_nomem:
return NULL;
-
-fail_nocontext:
- /*
- * If init_new_context() failed, we cannot use mmput() to free the mm
- * because it calls destroy_context()
- */
- mm_free_pgd(mm);
- free_mm(mm);
- return NULL;
}
static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
@@ -1328,6 +1327,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (retval)
goto bad_fork_cleanup_policy;
/* copy all the process information */
+ shm_init_task(p);
retval = copy_semundo(clone_flags, p);
if (retval)
goto bad_fork_cleanup_audit;
@@ -1873,6 +1873,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
*/
exit_sem(current);
}
+ if (unshare_flags & CLONE_NEWIPC) {
+ /* Orphan segments in old ns (see sem above). */
+ exit_shm(current);
+ shm_init_task(current);
+ }
if (new_nsproxy)
switch_task_namespaces(current, new_nsproxy);
diff --git a/kernel/panic.c b/kernel/panic.c
index 62e16cef9cc2..d09dc5c32c67 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -224,6 +224,7 @@ static const struct tnt tnts[] = {
{ TAINT_FIRMWARE_WORKAROUND, 'I', ' ' },
{ TAINT_OOT_MODULE, 'O', ' ' },
{ TAINT_UNSIGNED_MODULE, 'E', ' ' },
+ { TAINT_SOFTLOCKUP, 'L', ' ' },
};
/**
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 424c2d4265c9..86535c06a154 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -71,7 +71,7 @@ static DEFINE_SPINLOCK(hash_lock);
* SIGEV values. Here we put out an error if this assumption fails.
*/
#if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
- ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
+ ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
#endif
@@ -252,7 +252,8 @@ static int posix_get_monotonic_coarse(clockid_t which_clock,
return 0;
}
-static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
+static int posix_get_coarse_res(const clockid_t which_clock,
+ struct timespec *tp)
{
*tp = ktime_to_timespec(KTIME_LOW_RES);
return 0;
@@ -333,14 +334,16 @@ static __init int init_posix_timers(void)
posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
- posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
- posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
+ posix_timers_register_clock(CLOCK_REALTIME_COARSE,
+ &clock_realtime_coarse);
+ posix_timers_register_clock(CLOCK_MONOTONIC_COARSE,
+ &clock_monotonic_coarse);
posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
posix_timers_register_clock(CLOCK_TAI, &clock_tai);
posix_timers_cache = kmem_cache_create("posix_timers_cache",
- sizeof (struct k_itimer), 0, SLAB_PANIC,
- NULL);
+ sizeof(struct k_itimer), 0,
+ SLAB_PANIC, NULL);
return 0;
}
@@ -494,11 +497,11 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
return ret;
}
-static struct pid *good_sigevent(sigevent_t * event)
+static struct pid *good_sigevent(sigevent_t *event)
{
struct task_struct *rtn = current->group_leader;
- if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+ if ((event->sigev_notify & SIGEV_THREAD_ID) &&
(!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
!same_thread_group(rtn, current) ||
(event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
@@ -515,18 +518,18 @@ void posix_timers_register_clock(const clockid_t clock_id,
struct k_clock *new_clock)
{
if ((unsigned) clock_id >= MAX_CLOCKS) {
- printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
+ pr_warn("POSIX clock register failed for clock_id %d\n",
clock_id);
return;
}
if (!new_clock->clock_get) {
- printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
+ pr_warn("POSIX clock id %d lacks clock_get()\n",
clock_id);
return;
}
if (!new_clock->clock_getres) {
- printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
+ pr_warn("POSIX clock id %d lacks clock_getres()\n",
clock_id);
return;
}
@@ -535,7 +538,7 @@ void posix_timers_register_clock(const clockid_t clock_id,
}
EXPORT_SYMBOL_GPL(posix_timers_register_clock);
-static struct k_itimer * alloc_posix_timer(void)
+static struct k_itimer *alloc_posix_timer(void)
{
struct k_itimer *tmr;
tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
@@ -622,7 +625,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->it_overrun = -1;
if (timer_event_spec) {
- if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
+ if (copy_from_user(&event, timer_event_spec, sizeof(event))) {
error = -EFAULT;
goto out;
}
@@ -647,7 +650,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->sigq->info.si_code = SI_TIMER;
if (copy_to_user(created_timer_id,
- &new_timer_id, sizeof (new_timer_id))) {
+ &new_timer_id, sizeof(new_timer_id))) {
error = -EFAULT;
goto out;
}
@@ -748,7 +751,8 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
*/
if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
(timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
- timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
+ timr->it_overrun += (unsigned int) hrtimer_forward(timer, now,
+ iv);
remaining = ktime_sub(hrtimer_get_expires(timer), now);
/* Return 0 only, when the timer is expired and not pending */
@@ -785,7 +789,7 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
unlock_timer(timr, flags);
- if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
+ if (!ret && copy_to_user(setting, &cur_setting, sizeof(cur_setting)))
return -EFAULT;
return ret;
@@ -837,7 +841,7 @@ common_timer_set(struct k_itimer *timr, int flags,
if (hrtimer_try_to_cancel(timer) < 0)
return TIMER_RETRY;
- timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
+ timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
~REQUEUE_PENDING;
timr->it_overrun_last = 0;
@@ -857,9 +861,8 @@ common_timer_set(struct k_itimer *timr, int flags,
/* SIGEV_NONE timers are not queued ! See common_timer_get */
if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
/* Setup correct expiry time for relative timers */
- if (mode == HRTIMER_MODE_REL) {
+ if (mode == HRTIMER_MODE_REL)
hrtimer_add_expires(timer, timer->base->get_time());
- }
return 0;
}
@@ -882,7 +885,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
if (!new_setting)
return -EINVAL;
- if (copy_from_user(&new_spec, new_setting, sizeof (new_spec)))
+ if (copy_from_user(&new_spec, new_setting, sizeof(new_spec)))
return -EFAULT;
if (!timespec_valid(&new_spec.it_interval) ||
@@ -901,12 +904,12 @@ retry:
unlock_timer(timr, flag);
if (error == TIMER_RETRY) {
- rtn = NULL; // We already got the old time...
+ rtn = NULL; /* We already got the old time... */
goto retry;
}
if (old_setting && !error &&
- copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
+ copy_to_user(old_setting, &old_spec, sizeof(old_spec)))
error = -EFAULT;
return error;
@@ -1008,14 +1011,14 @@ SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
if (!kc || !kc->clock_set)
return -EINVAL;
- if (copy_from_user(&new_tp, tp, sizeof (*tp)))
+ if (copy_from_user(&new_tp, tp, sizeof(*tp)))
return -EFAULT;
return kc->clock_set(which_clock, &new_tp);
}
SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
- struct timespec __user *,tp)
+ struct timespec __user *, tp)
{
struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec kernel_tp;
@@ -1026,7 +1029,7 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
error = kc->clock_get(which_clock, &kernel_tp);
- if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
+ if (!error && copy_to_user(tp, &kernel_tp, sizeof(kernel_tp)))
error = -EFAULT;
return error;
@@ -1067,7 +1070,7 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
error = kc->clock_getres(which_clock, &rtn_tp);
- if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
+ if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof(rtn_tp)))
error = -EFAULT;
return error;
@@ -1096,7 +1099,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
if (!kc->nsleep)
return -ENANOSLEEP_NOTSUP;
- if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
+ if (copy_from_user(&t, rqtp, sizeof(struct timespec)))
return -EFAULT;
if (!timespec_valid(&t))
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ea2d5f6962ed..83f7a9524942 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -266,6 +266,7 @@ static u32 clear_idx;
#define LOG_ALIGN __alignof__(struct printk_log)
#endif
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
+#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;
@@ -828,34 +829,74 @@ void log_buf_kexec_setup(void)
/* requested log_buf_len from kernel cmdline */
static unsigned long __initdata new_log_buf_len;
-/* save requested log_buf_len since it's too early to process it */
-static int __init log_buf_len_setup(char *str)
+/* we practice scaling the ring buffer by powers of 2 */
+static void __init log_buf_len_update(unsigned size)
{
- unsigned size = memparse(str, &str);
-
if (size)
size = roundup_pow_of_two(size);
if (size > log_buf_len)
new_log_buf_len = size;
+}
+
+/* save requested log_buf_len since it's too early to process it */
+static int __init log_buf_len_setup(char *str)
+{
+ unsigned size = memparse(str, &str);
+
+ log_buf_len_update(size);
return 0;
}
early_param("log_buf_len", log_buf_len_setup);
+static void __init log_buf_add_cpu(void)
+{
+ unsigned int cpu_extra;
+
+ /*
+ * archs should set up cpu_possible_bits properly with
+ * set_cpu_possible() after setup_arch() but just in
+ * case lets ensure this is valid.
+ */
+ if (num_possible_cpus() == 1)
+ return;
+
+ cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;
+
+ /* by default this will only continue through for large > 64 CPUs */
+ if (cpu_extra <= __LOG_BUF_LEN / 2)
+ return;
+
+ pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
+ __LOG_CPU_MAX_BUF_LEN);
+ pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
+ cpu_extra);
+ pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);
+
+ log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
+}
+
void __init setup_log_buf(int early)
{
unsigned long flags;
char *new_log_buf;
int free;
+ if (log_buf != __log_buf)
+ return;
+
+ if (!early && !new_log_buf_len)
+ log_buf_add_cpu();
+
if (!new_log_buf_len)
return;
if (early) {
new_log_buf =
- memblock_virt_alloc(new_log_buf_len, PAGE_SIZE);
+ memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
} else {
- new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, 0);
+ new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
+ LOG_ALIGN);
}
if (unlikely(!new_log_buf)) {
@@ -872,7 +913,7 @@ void __init setup_log_buf(int early)
memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
- pr_info("log_buf_len: %d\n", log_buf_len);
+ pr_info("log_buf_len: %d bytes\n", log_buf_len);
pr_info("early log buf free: %d(%d%%)\n",
free, (free * 100) / __LOG_BUF_LEN);
}
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 12d6ebbfdd83..0dbab6d1acb4 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -14,6 +14,8 @@
* the GNU General Public License for more details.
*/
+#define pr_fmt(fmt) "Kprobe smoke test: " fmt
+
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/random.h>
@@ -41,8 +43,7 @@ static void kp_post_handler(struct kprobe *p, struct pt_regs *regs,
{
if (preh_val != (rand1 / div_factor)) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in post_handler\n");
+ pr_err("incorrect value in post_handler\n");
}
posth_val = preh_val + div_factor;
}
@@ -59,8 +60,7 @@ static int test_kprobe(void)
ret = register_kprobe(&kp);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kprobe returned %d\n", ret);
+ pr_err("register_kprobe returned %d\n", ret);
return ret;
}
@@ -68,14 +68,12 @@ static int test_kprobe(void)
unregister_kprobe(&kp);
if (preh_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe pre_handler not called\n");
+ pr_err("kprobe pre_handler not called\n");
handler_errors++;
}
if (posth_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe post_handler not called\n");
+ pr_err("kprobe post_handler not called\n");
handler_errors++;
}
@@ -98,8 +96,7 @@ static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs,
{
if (preh_val != (rand1 / div_factor) + 1) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in post_handler2\n");
+ pr_err("incorrect value in post_handler2\n");
}
posth_val = preh_val + div_factor;
}
@@ -120,8 +117,7 @@ static int test_kprobes(void)
kp.flags = 0;
ret = register_kprobes(kps, 2);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kprobes returned %d\n", ret);
+ pr_err("register_kprobes returned %d\n", ret);
return ret;
}
@@ -130,14 +126,12 @@ static int test_kprobes(void)
ret = target(rand1);
if (preh_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe pre_handler not called\n");
+ pr_err("kprobe pre_handler not called\n");
handler_errors++;
}
if (posth_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe post_handler not called\n");
+ pr_err("kprobe post_handler not called\n");
handler_errors++;
}
@@ -146,14 +140,12 @@ static int test_kprobes(void)
ret = target2(rand1);
if (preh_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe pre_handler2 not called\n");
+ pr_err("kprobe pre_handler2 not called\n");
handler_errors++;
}
if (posth_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe post_handler2 not called\n");
+ pr_err("kprobe post_handler2 not called\n");
handler_errors++;
}
@@ -166,8 +158,7 @@ static u32 j_kprobe_target(u32 value)
{
if (value != rand1) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in jprobe handler\n");
+ pr_err("incorrect value in jprobe handler\n");
}
jph_val = rand1;
@@ -186,16 +177,14 @@ static int test_jprobe(void)
ret = register_jprobe(&jp);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_jprobe returned %d\n", ret);
+ pr_err("register_jprobe returned %d\n", ret);
return ret;
}
ret = target(rand1);
unregister_jprobe(&jp);
if (jph_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "jprobe handler not called\n");
+ pr_err("jprobe handler not called\n");
handler_errors++;
}
@@ -217,24 +206,21 @@ static int test_jprobes(void)
jp.kp.flags = 0;
ret = register_jprobes(jps, 2);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_jprobes returned %d\n", ret);
+ pr_err("register_jprobes returned %d\n", ret);
return ret;
}
jph_val = 0;
ret = target(rand1);
if (jph_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "jprobe handler not called\n");
+ pr_err("jprobe handler not called\n");
handler_errors++;
}
jph_val = 0;
ret = target2(rand1);
if (jph_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "jprobe handler2 not called\n");
+ pr_err("jprobe handler2 not called\n");
handler_errors++;
}
unregister_jprobes(jps, 2);
@@ -256,13 +242,11 @@ static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
if (ret != (rand1 / div_factor)) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in kretprobe handler\n");
+ pr_err("incorrect value in kretprobe handler\n");
}
if (krph_val == 0) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "call to kretprobe entry handler failed\n");
+ pr_err("call to kretprobe entry handler failed\n");
}
krph_val = rand1;
@@ -281,16 +265,14 @@ static int test_kretprobe(void)
ret = register_kretprobe(&rp);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kretprobe returned %d\n", ret);
+ pr_err("register_kretprobe returned %d\n", ret);
return ret;
}
ret = target(rand1);
unregister_kretprobe(&rp);
if (krph_val != rand1) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kretprobe handler not called\n");
+ pr_err("kretprobe handler not called\n");
handler_errors++;
}
@@ -303,13 +285,11 @@ static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs)
if (ret != (rand1 / div_factor) + 1) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in kretprobe handler2\n");
+ pr_err("incorrect value in kretprobe handler2\n");
}
if (krph_val == 0) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "call to kretprobe entry handler failed\n");
+ pr_err("call to kretprobe entry handler failed\n");
}
krph_val = rand1;
@@ -332,24 +312,21 @@ static int test_kretprobes(void)
rp.kp.flags = 0;
ret = register_kretprobes(rps, 2);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kretprobe returned %d\n", ret);
+ pr_err("register_kretprobe returned %d\n", ret);
return ret;
}
krph_val = 0;
ret = target(rand1);
if (krph_val != rand1) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kretprobe handler not called\n");
+ pr_err("kretprobe handler not called\n");
handler_errors++;
}
krph_val = 0;
ret = target2(rand1);
if (krph_val != rand1) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kretprobe handler2 not called\n");
+ pr_err("kretprobe handler2 not called\n");
handler_errors++;
}
unregister_kretprobes(rps, 2);
@@ -368,7 +345,7 @@ int init_test_probes(void)
rand1 = prandom_u32();
} while (rand1 <= div_factor);
- printk(KERN_INFO "Kprobe smoke test started\n");
+ pr_info("started\n");
num_tests++;
ret = test_kprobe();
if (ret < 0)
@@ -402,13 +379,11 @@ int init_test_probes(void)
#endif /* CONFIG_KRETPROBES */
if (errors)
- printk(KERN_ERR "BUG: Kprobe smoke test: %d out of "
- "%d tests failed\n", errors, num_tests);
+ pr_err("BUG: %d out of %d tests failed\n", errors, num_tests);
else if (handler_errors)
- printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) "
- "running handlers\n", handler_errors);
+ pr_err("BUG: %d error(s) running handlers\n", handler_errors);
else
- printk(KERN_INFO "Kprobe smoke test passed successfully\n");
+ pr_info("passed successfully\n");
return 0;
}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index c3319bd1b040..a8d6914030fe 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -260,9 +260,11 @@ static void watchdog_overflow_callback(struct perf_event *event,
return;
if (hardlockup_panic)
- panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ panic("Watchdog detected hard LOCKUP on cpu %d",
+ this_cpu);
else
- WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
+ this_cpu);
__this_cpu_write(hard_watchdog_warn, true);
return;
@@ -345,7 +347,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
}
}
- printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+ pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
print_modules();
@@ -366,6 +368,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
smp_mb__after_atomic();
}
+ add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
__this_cpu_write(soft_watchdog_warn, true);
@@ -484,7 +487,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
if (PTR_ERR(event) == -EOPNOTSUPP)
pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
else if (PTR_ERR(event) == -ENOENT)
- pr_warning("disabled (cpu%i): hardware events not enabled\n",
+ pr_warn("disabled (cpu%i): hardware events not enabled\n",
cpu);
else
pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
diff --git a/lib/Kconfig b/lib/Kconfig
index 334f7722a999..fdf90f3aa90d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -177,6 +177,13 @@ config CRC8
when they need to do cyclic redundancy check according CRC8
algorithm. Module will be called crc8.
+config CRC64_ECMA
+ tristate "CRC64 ECMA function"
+ help
+ This option provides CRC64 ECMA function. Drivers may select this
+ when they need to do cyclic redundancy check according to the CRC64
+ ECMA algorithm.
+
config AUDIT_GENERIC
bool
depends on AUDIT && !AUDIT_ARCH
@@ -396,6 +403,39 @@ config CPU_RMAP
config DQL
bool
+config GLOB
+ bool
+# This actually supports modular compilation, but the module overhead
+# is ridiculous for the amount of code involved. Until an out-of-tree
+# driver asks for it, we'll just link it directly it into the kernel
+# when required. Since we're ignoring out-of-tree users, there's also
+# no need bother prompting for a manual decision:
+# prompt "glob_match() function"
+ help
+ This option provides a glob_match function for performing
+ simple text pattern matching. It originated in the ATA code
+ to blacklist particular drive models, but other device drivers
+ may need similar functionality.
+
+ All drivers in the Linux kernel tree that require this function
+ should automatically select this option. Say N unless you
+ are compiling an out-of tree driver which tells you that it
+ depends on this.
+
+config GLOB_SELFTEST
+ bool "glob self-test on init"
+ default n
+ depends on GLOB
+ help
+ This option enables a simple self-test of the glob_match
+ function on startup. It is primarily useful for people
+ working on the code to ensure they haven't introduced any
+ regressions.
+
+ It only adds a little bit of code and slows kernel boot (or
+ module load) by a small amount, so you're welcome to play with
+ it, but you probably don't need it.
+
#
# Netlink attribute parsing support is select'ed if needed
#
@@ -474,4 +514,11 @@ config UCS2_STRING
source "lib/fonts/Kconfig"
+#
+# sg chaining option
+#
+
+config ARCH_HAS_SG_CHAIN
+ def_bool n
+
endmenu
diff --git a/lib/Makefile b/lib/Makefile
index ba967a19edba..e48067c304d7 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_CRC32) += crc32.o
obj-$(CONFIG_CRC7) += crc7.o
obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
obj-$(CONFIG_CRC8) += crc8.o
+obj-$(CONFIG_CRC64_ECMA) += crc64_ecma.o
obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
@@ -136,6 +137,8 @@ obj-$(CONFIG_CORDIC) += cordic.o
obj-$(CONFIG_DQL) += dynamic_queue_limits.o
+obj-$(CONFIG_GLOB) += glob.o
+
obj-$(CONFIG_MPILIB) += mpi/
obj-$(CONFIG_SIGNATURE) += digsig.o
diff --git a/lib/cmdline.c b/lib/cmdline.c
index d4932f745e92..76a712e6e20e 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -121,11 +121,7 @@ EXPORT_SYMBOL(get_options);
* @retptr: (output) Optional pointer to next char after parse completes
*
* Parses a string into a number. The number stored at @ptr is
- * potentially suffixed with %K (for kilobytes, or 1024 bytes),
- * %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
- * 1073741824). If the number is suffixed with K, M, or G, then
- * the return value is the number multiplied by one kilobyte, one
- * megabyte, or one gigabyte, respectively.
+ * potentially suffixed with K, M, G, T, P, E.
*/
unsigned long long memparse(const char *ptr, char **retptr)
@@ -135,6 +131,15 @@ unsigned long long memparse(const char *ptr, char **retptr)
unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
switch (*endptr) {
+ case 'E':
+ case 'e':
+ ret <<= 10;
+ case 'P':
+ case 'p':
+ ret <<= 10;
+ case 'T':
+ case 't':
+ ret <<= 10;
case 'G':
case 'g':
ret <<= 10;
diff --git a/lib/crc64_ecma.c b/lib/crc64_ecma.c
new file mode 100644
index 000000000000..41629ea5a60c
--- /dev/null
+++ b/lib/crc64_ecma.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/crc64_ecma.h>
+
+
+#define CRC64_BYTE_MASK 0xFF
+#define CRC64_TABLE_SIZE 256
+
+
+struct crc64_table {
+ u64 seed;
+ u64 table[CRC64_TABLE_SIZE];
+};
+
+
+static struct crc64_table CRC64_ECMA_182 = {
+ CRC64_DEFAULT_INITVAL,
+ {
+ 0x0000000000000000ULL,
+ 0xb32e4cbe03a75f6fULL,
+ 0xf4843657a840a05bULL,
+ 0x47aa7ae9abe7ff34ULL,
+ 0x7bd0c384ff8f5e33ULL,
+ 0xc8fe8f3afc28015cULL,
+ 0x8f54f5d357cffe68ULL,
+ 0x3c7ab96d5468a107ULL,
+ 0xf7a18709ff1ebc66ULL,
+ 0x448fcbb7fcb9e309ULL,
+ 0x0325b15e575e1c3dULL,
+ 0xb00bfde054f94352ULL,
+ 0x8c71448d0091e255ULL,
+ 0x3f5f08330336bd3aULL,
+ 0x78f572daa8d1420eULL,
+ 0xcbdb3e64ab761d61ULL,
+ 0x7d9ba13851336649ULL,
+ 0xceb5ed8652943926ULL,
+ 0x891f976ff973c612ULL,
+ 0x3a31dbd1fad4997dULL,
+ 0x064b62bcaebc387aULL,
+ 0xb5652e02ad1b6715ULL,
+ 0xf2cf54eb06fc9821ULL,
+ 0x41e11855055bc74eULL,
+ 0x8a3a2631ae2dda2fULL,
+ 0x39146a8fad8a8540ULL,
+ 0x7ebe1066066d7a74ULL,
+ 0xcd905cd805ca251bULL,
+ 0xf1eae5b551a2841cULL,
+ 0x42c4a90b5205db73ULL,
+ 0x056ed3e2f9e22447ULL,
+ 0xb6409f5cfa457b28ULL,
+ 0xfb374270a266cc92ULL,
+ 0x48190ecea1c193fdULL,
+ 0x0fb374270a266cc9ULL,
+ 0xbc9d3899098133a6ULL,
+ 0x80e781f45de992a1ULL,
+ 0x33c9cd4a5e4ecdceULL,
+ 0x7463b7a3f5a932faULL,
+ 0xc74dfb1df60e6d95ULL,
+ 0x0c96c5795d7870f4ULL,
+ 0xbfb889c75edf2f9bULL,
+ 0xf812f32ef538d0afULL,
+ 0x4b3cbf90f69f8fc0ULL,
+ 0x774606fda2f72ec7ULL,
+ 0xc4684a43a15071a8ULL,
+ 0x83c230aa0ab78e9cULL,
+ 0x30ec7c140910d1f3ULL,
+ 0x86ace348f355aadbULL,
+ 0x3582aff6f0f2f5b4ULL,
+ 0x7228d51f5b150a80ULL,
+ 0xc10699a158b255efULL,
+ 0xfd7c20cc0cdaf4e8ULL,
+ 0x4e526c720f7dab87ULL,
+ 0x09f8169ba49a54b3ULL,
+ 0xbad65a25a73d0bdcULL,
+ 0x710d64410c4b16bdULL,
+ 0xc22328ff0fec49d2ULL,
+ 0x85895216a40bb6e6ULL,
+ 0x36a71ea8a7ace989ULL,
+ 0x0adda7c5f3c4488eULL,
+ 0xb9f3eb7bf06317e1ULL,
+ 0xfe5991925b84e8d5ULL,
+ 0x4d77dd2c5823b7baULL,
+ 0x64b62bcaebc387a1ULL,
+ 0xd7986774e864d8ceULL,
+ 0x90321d9d438327faULL,
+ 0x231c512340247895ULL,
+ 0x1f66e84e144cd992ULL,
+ 0xac48a4f017eb86fdULL,
+ 0xebe2de19bc0c79c9ULL,
+ 0x58cc92a7bfab26a6ULL,
+ 0x9317acc314dd3bc7ULL,
+ 0x2039e07d177a64a8ULL,
+ 0x67939a94bc9d9b9cULL,
+ 0xd4bdd62abf3ac4f3ULL,
+ 0xe8c76f47eb5265f4ULL,
+ 0x5be923f9e8f53a9bULL,
+ 0x1c4359104312c5afULL,
+ 0xaf6d15ae40b59ac0ULL,
+ 0x192d8af2baf0e1e8ULL,
+ 0xaa03c64cb957be87ULL,
+ 0xeda9bca512b041b3ULL,
+ 0x5e87f01b11171edcULL,
+ 0x62fd4976457fbfdbULL,
+ 0xd1d305c846d8e0b4ULL,
+ 0x96797f21ed3f1f80ULL,
+ 0x2557339fee9840efULL,
+ 0xee8c0dfb45ee5d8eULL,
+ 0x5da24145464902e1ULL,
+ 0x1a083bacedaefdd5ULL,
+ 0xa9267712ee09a2baULL,
+ 0x955cce7fba6103bdULL,
+ 0x267282c1b9c65cd2ULL,
+ 0x61d8f8281221a3e6ULL,
+ 0xd2f6b4961186fc89ULL,
+ 0x9f8169ba49a54b33ULL,
+ 0x2caf25044a02145cULL,
+ 0x6b055fede1e5eb68ULL,
+ 0xd82b1353e242b407ULL,
+ 0xe451aa3eb62a1500ULL,
+ 0x577fe680b58d4a6fULL,
+ 0x10d59c691e6ab55bULL,
+ 0xa3fbd0d71dcdea34ULL,
+ 0x6820eeb3b6bbf755ULL,
+ 0xdb0ea20db51ca83aULL,
+ 0x9ca4d8e41efb570eULL,
+ 0x2f8a945a1d5c0861ULL,
+ 0x13f02d374934a966ULL,
+ 0xa0de61894a93f609ULL,
+ 0xe7741b60e174093dULL,
+ 0x545a57dee2d35652ULL,
+ 0xe21ac88218962d7aULL,
+ 0x5134843c1b317215ULL,
+ 0x169efed5b0d68d21ULL,
+ 0xa5b0b26bb371d24eULL,
+ 0x99ca0b06e7197349ULL,
+ 0x2ae447b8e4be2c26ULL,
+ 0x6d4e3d514f59d312ULL,
+ 0xde6071ef4cfe8c7dULL,
+ 0x15bb4f8be788911cULL,
+ 0xa6950335e42fce73ULL,
+ 0xe13f79dc4fc83147ULL,
+ 0x521135624c6f6e28ULL,
+ 0x6e6b8c0f1807cf2fULL,
+ 0xdd45c0b11ba09040ULL,
+ 0x9aefba58b0476f74ULL,
+ 0x29c1f6e6b3e0301bULL,
+ 0xc96c5795d7870f42ULL,
+ 0x7a421b2bd420502dULL,
+ 0x3de861c27fc7af19ULL,
+ 0x8ec62d7c7c60f076ULL,
+ 0xb2bc941128085171ULL,
+ 0x0192d8af2baf0e1eULL,
+ 0x4638a2468048f12aULL,
+ 0xf516eef883efae45ULL,
+ 0x3ecdd09c2899b324ULL,
+ 0x8de39c222b3eec4bULL,
+ 0xca49e6cb80d9137fULL,
+ 0x7967aa75837e4c10ULL,
+ 0x451d1318d716ed17ULL,
+ 0xf6335fa6d4b1b278ULL,
+ 0xb199254f7f564d4cULL,
+ 0x02b769f17cf11223ULL,
+ 0xb4f7f6ad86b4690bULL,
+ 0x07d9ba1385133664ULL,
+ 0x4073c0fa2ef4c950ULL,
+ 0xf35d8c442d53963fULL,
+ 0xcf273529793b3738ULL,
+ 0x7c0979977a9c6857ULL,
+ 0x3ba3037ed17b9763ULL,
+ 0x888d4fc0d2dcc80cULL,
+ 0x435671a479aad56dULL,
+ 0xf0783d1a7a0d8a02ULL,
+ 0xb7d247f3d1ea7536ULL,
+ 0x04fc0b4dd24d2a59ULL,
+ 0x3886b22086258b5eULL,
+ 0x8ba8fe9e8582d431ULL,
+ 0xcc0284772e652b05ULL,
+ 0x7f2cc8c92dc2746aULL,
+ 0x325b15e575e1c3d0ULL,
+ 0x8175595b76469cbfULL,
+ 0xc6df23b2dda1638bULL,
+ 0x75f16f0cde063ce4ULL,
+ 0x498bd6618a6e9de3ULL,
+ 0xfaa59adf89c9c28cULL,
+ 0xbd0fe036222e3db8ULL,
+ 0x0e21ac88218962d7ULL,
+ 0xc5fa92ec8aff7fb6ULL,
+ 0x76d4de52895820d9ULL,
+ 0x317ea4bb22bfdfedULL,
+ 0x8250e80521188082ULL,
+ 0xbe2a516875702185ULL,
+ 0x0d041dd676d77eeaULL,
+ 0x4aae673fdd3081deULL,
+ 0xf9802b81de97deb1ULL,
+ 0x4fc0b4dd24d2a599ULL,
+ 0xfceef8632775faf6ULL,
+ 0xbb44828a8c9205c2ULL,
+ 0x086ace348f355aadULL,
+ 0x34107759db5dfbaaULL,
+ 0x873e3be7d8faa4c5ULL,
+ 0xc094410e731d5bf1ULL,
+ 0x73ba0db070ba049eULL,
+ 0xb86133d4dbcc19ffULL,
+ 0x0b4f7f6ad86b4690ULL,
+ 0x4ce50583738cb9a4ULL,
+ 0xffcb493d702be6cbULL,
+ 0xc3b1f050244347ccULL,
+ 0x709fbcee27e418a3ULL,
+ 0x3735c6078c03e797ULL,
+ 0x841b8ab98fa4b8f8ULL,
+ 0xadda7c5f3c4488e3ULL,
+ 0x1ef430e13fe3d78cULL,
+ 0x595e4a08940428b8ULL,
+ 0xea7006b697a377d7ULL,
+ 0xd60abfdbc3cbd6d0ULL,
+ 0x6524f365c06c89bfULL,
+ 0x228e898c6b8b768bULL,
+ 0x91a0c532682c29e4ULL,
+ 0x5a7bfb56c35a3485ULL,
+ 0xe955b7e8c0fd6beaULL,
+ 0xaeffcd016b1a94deULL,
+ 0x1dd181bf68bdcbb1ULL,
+ 0x21ab38d23cd56ab6ULL,
+ 0x9285746c3f7235d9ULL,
+ 0xd52f0e859495caedULL,
+ 0x6601423b97329582ULL,
+ 0xd041dd676d77eeaaULL,
+ 0x636f91d96ed0b1c5ULL,
+ 0x24c5eb30c5374ef1ULL,
+ 0x97eba78ec690119eULL,
+ 0xab911ee392f8b099ULL,
+ 0x18bf525d915feff6ULL,
+ 0x5f1528b43ab810c2ULL,
+ 0xec3b640a391f4fadULL,
+ 0x27e05a6e926952ccULL,
+ 0x94ce16d091ce0da3ULL,
+ 0xd3646c393a29f297ULL,
+ 0x604a2087398eadf8ULL,
+ 0x5c3099ea6de60cffULL,
+ 0xef1ed5546e415390ULL,
+ 0xa8b4afbdc5a6aca4ULL,
+ 0x1b9ae303c601f3cbULL,
+ 0x56ed3e2f9e224471ULL,
+ 0xe5c372919d851b1eULL,
+ 0xa26908783662e42aULL,
+ 0x114744c635c5bb45ULL,
+ 0x2d3dfdab61ad1a42ULL,
+ 0x9e13b115620a452dULL,
+ 0xd9b9cbfcc9edba19ULL,
+ 0x6a978742ca4ae576ULL,
+ 0xa14cb926613cf817ULL,
+ 0x1262f598629ba778ULL,
+ 0x55c88f71c97c584cULL,
+ 0xe6e6c3cfcadb0723ULL,
+ 0xda9c7aa29eb3a624ULL,
+ 0x69b2361c9d14f94bULL,
+ 0x2e184cf536f3067fULL,
+ 0x9d36004b35545910ULL,
+ 0x2b769f17cf112238ULL,
+ 0x9858d3a9ccb67d57ULL,
+ 0xdff2a94067518263ULL,
+ 0x6cdce5fe64f6dd0cULL,
+ 0x50a65c93309e7c0bULL,
+ 0xe388102d33392364ULL,
+ 0xa4226ac498dedc50ULL,
+ 0x170c267a9b79833fULL,
+ 0xdcd7181e300f9e5eULL,
+ 0x6ff954a033a8c131ULL,
+ 0x28532e49984f3e05ULL,
+ 0x9b7d62f79be8616aULL,
+ 0xa707db9acf80c06dULL,
+ 0x14299724cc279f02ULL,
+ 0x5383edcd67c06036ULL,
+ 0xe0ada17364673f59ULL
+ }
+};
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void)
+{
+ return CRC64_ECMA_182.seed;
+}
+EXPORT_SYMBOL(crc64_ecma_seed);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * pdata: pointer to the data to compute checksum for.
+ * nbytes: number of bytes in data buffer.
+ * seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed)
+{
+ unsigned int i;
+ u64 crc = seed;
+
+ for (i = 0; i < nbytes; i++)
+ crc = CRC64_ECMA_182.table[(crc ^ pdata[i]) & CRC64_BYTE_MASK] ^
+ (crc >> 8);
+
+ return crc;
+}
+EXPORT_SYMBOL(crc64_ecma);
+
+MODULE_DESCRIPTION("CRC64 ECMA function");
+MODULE_AUTHOR("Freescale Semiconductor Inc.");
+MODULE_LICENSE("GPL");
diff --git a/lib/decompress.c b/lib/decompress.c
index 86069d74c062..37f3c786348f 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -54,7 +54,7 @@ static const struct compress_format compressed_formats[] __initconst = {
{ {0, 0}, NULL, NULL }
};
-decompress_fn __init decompress_method(const unsigned char *inbuf, int len,
+decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
const char **name)
{
const struct compress_format *cf;
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 31c5f7675fbf..8290e0bef7ea 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -92,8 +92,8 @@ struct bunzip_data {
/* State for interrupting output loop */
int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
/* I/O tracking data (file handles, buffers, positions, etc.) */
- int (*fill)(void*, unsigned int);
- int inbufCount, inbufPos /*, outbufPos*/;
+ long (*fill)(void*, unsigned long);
+ long inbufCount, inbufPos /*, outbufPos*/;
unsigned char *inbuf /*,*outbuf*/;
unsigned int inbufBitCount, inbufBits;
/* The CRC values stored in the block header and calculated from the
@@ -617,7 +617,7 @@ decode_next_byte:
goto decode_next_byte;
}
-static int INIT nofill(void *buf, unsigned int len)
+static long INIT nofill(void *buf, unsigned long len)
{
return -1;
}
@@ -625,8 +625,8 @@ static int INIT nofill(void *buf, unsigned int len)
/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain
a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
ignored, and data is read from file handle into temporary buffer. */
-static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
- int (*fill)(void*, unsigned int))
+static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len,
+ long (*fill)(void*, unsigned long))
{
struct bunzip_data *bd;
unsigned int i, j, c;
@@ -675,11 +675,11 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,
not end of file.) */
-STATIC int INIT bunzip2(unsigned char *buf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT bunzip2(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *outbuf,
- int *pos,
+ long *pos,
void(*error)(char *x))
{
struct bunzip_data *bd;
@@ -743,11 +743,11 @@ exit_0:
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *outbuf,
- int *pos,
+ long *pos,
void(*error)(char *x))
{
return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index 0edfd742a154..d4c7891635ec 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -27,17 +27,17 @@
#define GZIP_IOBUF_SIZE (16*1024)
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
{
return -1;
}
/* Included from initramfs et al code */
-STATIC int INIT gunzip(unsigned char *buf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT gunzip(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *out_buf,
- int *pos,
+ long *pos,
void(*error)(char *x)) {
u8 *zbuf;
struct z_stream_s *strm;
@@ -142,7 +142,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
/* Write any data generated */
if (flush && strm->next_out > out_buf) {
- int l = strm->next_out - out_buf;
+ long l = strm->next_out - out_buf;
if (l != flush(out_buf, l)) {
rc = -1;
error("write error");
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 7d1e83caf8ad..40f66ebe57b7 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -31,10 +31,10 @@
#define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20)
#define ARCHIVE_MAGICNUMBER 0x184C2102
-STATIC inline int INIT unlz4(u8 *input, int in_len,
- int (*fill) (void *, unsigned int),
- int (*flush) (void *, unsigned int),
- u8 *output, int *posp,
+STATIC inline int INIT unlz4(u8 *input, long in_len,
+ long (*fill)(void *, unsigned long),
+ long (*flush)(void *, unsigned long),
+ u8 *output, long *posp,
void (*error) (char *x))
{
int ret = -1;
@@ -43,7 +43,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
u8 *inp;
u8 *inp_start;
u8 *outp;
- int size = in_len;
+ long size = in_len;
#ifdef PREBOOT
size_t out_len = get_unaligned_le32(input + in_len);
#endif
@@ -83,13 +83,20 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
if (posp)
*posp = 0;
- if (fill)
- fill(inp, 4);
+ if (fill) {
+ size = fill(inp, 4);
+ if (size < 4) {
+ error("data corrupted");
+ goto exit_2;
+ }
+ }
chunksize = get_unaligned_le32(inp);
if (chunksize == ARCHIVE_MAGICNUMBER) {
- inp += 4;
- size -= 4;
+ if (!fill) {
+ inp += 4;
+ size -= 4;
+ }
} else {
error("invalid header");
goto exit_2;
@@ -100,29 +107,44 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
for (;;) {
- if (fill)
- fill(inp, 4);
+ if (fill) {
+ size = fill(inp, 4);
+ if (size == 0)
+ break;
+ if (size < 4) {
+ error("data corrupted");
+ goto exit_2;
+ }
+ }
chunksize = get_unaligned_le32(inp);
if (chunksize == ARCHIVE_MAGICNUMBER) {
- inp += 4;
- size -= 4;
+ if (!fill) {
+ inp += 4;
+ size -= 4;
+ }
if (posp)
*posp += 4;
continue;
}
- inp += 4;
- size -= 4;
+
if (posp)
*posp += 4;
- if (fill) {
+ if (!fill) {
+ inp += 4;
+ size -= 4;
+ } else {
if (chunksize > lz4_compressbound(uncomp_chunksize)) {
error("chunk length is longer than allocated");
goto exit_2;
}
- fill(inp, chunksize);
+ size = fill(inp, chunksize);
+ if (size < chunksize) {
+ error("data corrupted");
+ goto exit_2;
+ }
}
#ifdef PREBOOT
if (out_len >= uncomp_chunksize) {
@@ -149,18 +171,17 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
if (posp)
*posp += chunksize;
- size -= chunksize;
+ if (!fill) {
+ size -= chunksize;
- if (size == 0)
- break;
- else if (size < 0) {
- error("data corrupted");
- goto exit_2;
+ if (size == 0)
+ break;
+ else if (size < 0) {
+ error("data corrupted");
+ goto exit_2;
+ }
+ inp += chunksize;
}
-
- inp += chunksize;
- if (fill)
- inp = inp_start;
}
ret = 0;
@@ -175,11 +196,11 @@ exit_0:
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
)
{
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 32adb73a9038..0be83af62b88 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -65,11 +65,11 @@ static long long INIT read_int(unsigned char *ptr, int size)
#define LZMA_IOBUF_SIZE 0x10000
struct rc {
- int (*fill)(void*, unsigned int);
+ long (*fill)(void*, unsigned long);
uint8_t *ptr;
uint8_t *buffer;
uint8_t *buffer_end;
- int buffer_size;
+ long buffer_size;
uint32_t code;
uint32_t range;
uint32_t bound;
@@ -82,7 +82,7 @@ struct rc {
#define RC_MODEL_TOTAL_BITS 11
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
{
return -1;
}
@@ -99,8 +99,8 @@ static void INIT rc_read(struct rc *rc)
/* Called once */
static inline void INIT rc_init(struct rc *rc,
- int (*fill)(void*, unsigned int),
- char *buffer, int buffer_size)
+ long (*fill)(void*, unsigned long),
+ char *buffer, long buffer_size)
{
if (fill)
rc->fill = fill;
@@ -280,7 +280,7 @@ struct writer {
size_t buffer_pos;
int bufsize;
size_t global_pos;
- int(*flush)(void*, unsigned int);
+ long (*flush)(void*, unsigned long);
struct lzma_header *header;
};
@@ -534,11 +534,11 @@ static inline int INIT process_bit1(struct writer *wr, struct rc *rc,
-STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC inline int INIT unlzma(unsigned char *buf, long in_len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
)
{
@@ -667,11 +667,11 @@ exit_0:
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
)
{
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 960183d4258f..b94a31bdd87d 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -51,7 +51,7 @@ static const unsigned char lzop_magic[] = {
#define HEADER_SIZE_MIN (9 + 7 + 4 + 8 + 1 + 4)
#define HEADER_SIZE_MAX (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4)
-STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
+STATIC inline long INIT parse_header(u8 *input, long *skip, long in_len)
{
int l;
u8 *parse = input;
@@ -108,14 +108,14 @@ STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
return 1;
}
-STATIC inline int INIT unlzo(u8 *input, int in_len,
- int (*fill) (void *, unsigned int),
- int (*flush) (void *, unsigned int),
- u8 *output, int *posp,
+STATIC int INIT unlzo(u8 *input, long in_len,
+ long (*fill)(void *, unsigned long),
+ long (*flush)(void *, unsigned long),
+ u8 *output, long *posp,
void (*error) (char *x))
{
u8 r = 0;
- int skip = 0;
+ long skip = 0;
u32 src_len, dst_len;
size_t tmp;
u8 *in_buf, *in_buf_save, *out_buf;
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
index 9f34eb56854d..b07a78340e9d 100644
--- a/lib/decompress_unxz.c
+++ b/lib/decompress_unxz.c
@@ -248,10 +248,10 @@ void *memmove(void *dest, const void *src, size_t size)
* both input and output buffers are available as a single chunk, i.e. when
* fill() and flush() won't be used.
*/
-STATIC int INIT unxz(unsigned char *in, int in_size,
- int (*fill)(void *dest, unsigned int size),
- int (*flush)(void *src, unsigned int size),
- unsigned char *out, int *in_used,
+STATIC int INIT unxz(unsigned char *in, long in_size,
+ long (*fill)(void *dest, unsigned long size),
+ long (*flush)(void *src, unsigned long size),
+ unsigned char *out, long *in_used,
void (*error)(char *x))
{
struct xz_buf b;
@@ -329,7 +329,7 @@ STATIC int INIT unxz(unsigned char *in, int in_size,
* returned by xz_dec_run(), but probably
* it's not too bad.
*/
- if (flush(b.out, b.out_pos) != (int)b.out_pos)
+ if (flush(b.out, b.out_pos) != (long)b.out_pos)
ret = XZ_BUF_ERROR;
b.out_pos = 0;
diff --git a/lib/glob.c b/lib/glob.c
new file mode 100644
index 000000000000..500fc80d23e1
--- /dev/null
+++ b/lib/glob.c
@@ -0,0 +1,287 @@
+#include <linux/module.h>
+#include <linux/glob.h>
+
+/*
+ * The only reason this code can be compiled as a module is because the
+ * ATA code that depends on it can be as well. In practice, they're
+ * both usually compiled in and the module overhead goes away.
+ */
+MODULE_DESCRIPTION("glob(7) matching");
+MODULE_LICENSE("Dual MIT/GPL");
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match. The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns. Thus, it
+ * does not preprocess the patterns. It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+bool __pure glob_match(char const *pat, char const *str)
+{
+ /*
+ * Backtrack to previous * on mismatch and retry starting one
+ * character later in the string. Because * matches all characters
+ * (no exception for /), it can be easily proved that there's
+ * never a need to backtrack multiple levels.
+ */
+ char const *back_pat = NULL, *back_str = back_str;
+
+ /*
+ * Loop over each token (character or class) in pat, matching
+ * it against the remaining unmatched tail of str. Return false
+ * on mismatch, or true after matching the trailing nul bytes.
+ */
+ for (;;) {
+ unsigned char c = *str++;
+ unsigned char d = *pat++;
+
+ switch (d) {
+ case '?': /* Wildcard: anything but nul */
+ if (c == '\0')
+ return false;
+ break;
+ case '*': /* Any-length wildcard */
+ if (*pat == '\0') /* Optimize trailing * case */
+ return true;
+ back_pat = pat;
+ back_str = --str; /* Allow zero-length match */
+ break;
+ case '[': { /* Character class */
+ bool match = false, inverted = (*pat == '!');
+ char const *class = pat + inverted;
+ unsigned char a = *class++;
+
+ /*
+ * Iterate over each span in the character class.
+ * A span is either a single character a, or a
+ * range a-b. The first span may begin with ']'.
+ */
+ do {
+ unsigned char b = a;
+
+ if (a == '\0') /* Malformed */
+ goto literal;
+
+ if (class[0] == '-' && class[1] != ']') {
+ b = class[1];
+
+ if (b == '\0')
+ goto literal;
+
+ class += 2;
+ /* Any special action if a > b? */
+ }
+ match |= (a <= c && c <= b);
+ } while ((a = *class++) != ']');
+
+ if (match == inverted)
+ goto backtrack;
+ pat = class;
+ }
+ break;
+ case '\\':
+ d = *pat++;
+ /*FALLTHROUGH*/
+ default: /* Literal character */
+literal:
+ if (c == d) {
+ if (d == '\0')
+ return true;
+ break;
+ }
+backtrack:
+ if (c == '\0' || !back_pat)
+ return false; /* No point continuing */
+ /* Try again from last *, one character later in str. */
+ pat = back_pat;
+ str = ++back_str;
+ break;
+ }
+ }
+}
+EXPORT_SYMBOL(glob_match);
+
+
+#ifdef CONFIG_GLOB_SELFTEST
+
+#include <linux/printk.h>
+#include <linux/moduleparam.h>
+
+/* Boot with "glob.verbose=1" to show successful tests, too */
+static bool verbose = false;
+module_param(verbose, bool, 0);
+
+struct glob_test {
+ char const *pat, *str;
+ bool expected;
+};
+
+static bool __pure __init test(char const *pat, char const *str, bool expected)
+{
+ bool match = glob_match(pat, str);
+ bool success = match == expected;
+
+ /* Can't get string literals into a particular section, so... */
+ static char const msg_error[] __initconst =
+ KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
+ static char const msg_ok[] __initconst =
+ KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
+ static char const mismatch[] __initconst = "mismatch";
+ char const *message;
+
+ if (!success)
+ message = msg_error;
+ else if (verbose)
+ message = msg_ok;
+ else
+ return success;
+
+ printk(message, pat, str, mismatch + 3*match);
+ return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section. The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static char const glob_tests[] __initconst =
+ /* Some basic tests */
+ "1" "a\0" "a\0"
+ "0" "a\0" "b\0"
+ "0" "a\0" "aa\0"
+ "0" "a\0" "\0"
+ "1" "\0" "\0"
+ "0" "\0" "a\0"
+ /* Simple character class tests */
+ "1" "[a]\0" "a\0"
+ "0" "[a]\0" "b\0"
+ "0" "[!a]\0" "a\0"
+ "1" "[!a]\0" "b\0"
+ "1" "[ab]\0" "a\0"
+ "1" "[ab]\0" "b\0"
+ "0" "[ab]\0" "c\0"
+ "1" "[!ab]\0" "c\0"
+ "1" "[a-c]\0" "b\0"
+ "0" "[a-c]\0" "d\0"
+ /* Corner cases in character class parsing */
+ "1" "[a-c-e-g]\0" "-\0"
+ "0" "[a-c-e-g]\0" "d\0"
+ "1" "[a-c-e-g]\0" "f\0"
+ "1" "[]a-ceg-ik[]\0" "a\0"
+ "1" "[]a-ceg-ik[]\0" "]\0"
+ "1" "[]a-ceg-ik[]\0" "[\0"
+ "1" "[]a-ceg-ik[]\0" "h\0"
+ "0" "[]a-ceg-ik[]\0" "f\0"
+ "0" "[!]a-ceg-ik[]\0" "h\0"
+ "0" "[!]a-ceg-ik[]\0" "]\0"
+ "1" "[!]a-ceg-ik[]\0" "f\0"
+ /* Simple wild cards */
+ "1" "?\0" "a\0"
+ "0" "?\0" "aa\0"
+ "0" "??\0" "a\0"
+ "1" "?x?\0" "axb\0"
+ "0" "?x?\0" "abx\0"
+ "0" "?x?\0" "xab\0"
+ /* Asterisk wild cards (backtracking) */
+ "0" "*??\0" "a\0"
+ "1" "*??\0" "ab\0"
+ "1" "*??\0" "abc\0"
+ "1" "*??\0" "abcd\0"
+ "0" "??*\0" "a\0"
+ "1" "??*\0" "ab\0"
+ "1" "??*\0" "abc\0"
+ "1" "??*\0" "abcd\0"
+ "0" "?*?\0" "a\0"
+ "1" "?*?\0" "ab\0"
+ "1" "?*?\0" "abc\0"
+ "1" "?*?\0" "abcd\0"
+ "1" "*b\0" "b\0"
+ "1" "*b\0" "ab\0"
+ "0" "*b\0" "ba\0"
+ "1" "*b\0" "bb\0"
+ "1" "*b\0" "abb\0"
+ "1" "*b\0" "bab\0"
+ "1" "*bc\0" "abbc\0"
+ "1" "*bc\0" "bc\0"
+ "1" "*bc\0" "bbc\0"
+ "1" "*bc\0" "bcbc\0"
+ /* Multiple asterisks (complex backtracking) */
+ "1" "*ac*\0" "abacadaeafag\0"
+ "1" "*ac*ae*ag*\0" "abacadaeafag\0"
+ "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+ "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+ "1" "*abcd*\0" "abcabcabcabcdefg\0"
+ "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+ "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+ "0" "*abcd*\0" "abcabcabcabcefg\0"
+ "0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+static int __init glob_init(void)
+{
+ unsigned successes = 0;
+ unsigned n = 0;
+ char const *p = glob_tests;
+ static char const message[] __initconst =
+ KERN_INFO "glob: %u self-tests passed, %u failed\n";
+
+ /*
+ * Tests are jammed together in a string. The first byte is '1'
+ * or '0' to indicate the expected outcome, or '\0' to indicate the
+ * end of the tests. Then come two null-terminated strings: the
+ * pattern and the string to match it against.
+ */
+ while (*p) {
+ bool expected = *p++ & 1;
+ char const *pat = p;
+
+ p += strlen(p) + 1;
+ successes += test(pat, p, expected);
+ p += strlen(p) + 1;
+ n++;
+ }
+
+ n -= successes;
+ printk(message, successes, n);
+
+ /* What's the errno for "kernel bug detected"? Guess... */
+ return n ? -ECANCELED : 0;
+}
+
+/* We need a dummy exit function to allow unload */
+static void __exit glob_fini(void) { }
+
+module_init(glob_init);
+module_exit(glob_fini);
+
+#endif /* CONFIG_GLOB_SELFTEST */
diff --git a/lib/idr.c b/lib/idr.c
index 39158abebad1..50be3fa9b657 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -590,26 +590,27 @@ static void __idr_remove_all(struct idr *idp)
struct idr_layer **paa = &pa[0];
n = idp->layers * IDR_BITS;
- p = idp->top;
+ *paa = idp->top;
RCU_INIT_POINTER(idp->top, NULL);
max = idr_max(idp->layers);
id = 0;
while (id >= 0 && id <= max) {
+ p = *paa;
while (n > IDR_BITS && p) {
n -= IDR_BITS;
- *paa++ = p;
p = p->ary[(id >> n) & IDR_MASK];
+ *++paa = p;
}
bt_mask = id;
id += 1 << n;
/* Get the highest bit that the above add changed from 0->1. */
while (n < fls(id ^ bt_mask)) {
- if (p)
- free_layer(idp, p);
+ if (*paa)
+ free_layer(idp, *paa);
n += IDR_BITS;
- p = *--paa;
+ --paa;
}
}
idp->layers = 0;
@@ -692,15 +693,16 @@ int idr_for_each(struct idr *idp,
struct idr_layer **paa = &pa[0];
n = idp->layers * IDR_BITS;
- p = rcu_dereference_raw(idp->top);
+ *paa = rcu_dereference_raw(idp->top);
max = idr_max(idp->layers);
id = 0;
while (id >= 0 && id <= max) {
+ p = *paa;
while (n > 0 && p) {
n -= IDR_BITS;
- *paa++ = p;
p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+ *++paa = p;
}
if (p) {
@@ -712,7 +714,7 @@ int idr_for_each(struct idr *idp,
id += 1 << n;
while (n < fls(id)) {
n += IDR_BITS;
- p = *--paa;
+ --paa;
}
}
@@ -740,17 +742,18 @@ void *idr_get_next(struct idr *idp, int *nextidp)
int n, max;
/* find first ent */
- p = rcu_dereference_raw(idp->top);
+ p = *paa = rcu_dereference_raw(idp->top);
if (!p)
return NULL;
n = (p->layer + 1) * IDR_BITS;
max = idr_max(p->layer + 1);
while (id >= 0 && id <= max) {
+ p = *paa;
while (n > 0 && p) {
n -= IDR_BITS;
- *paa++ = p;
p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+ *++paa = p;
}
if (p) {
@@ -768,7 +771,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
id = round_up(id + 1, 1 << n);
while (n < fls(id)) {
n += IDR_BITS;
- p = *--paa;
+ --paa;
}
}
return NULL;
diff --git a/lib/klist.c b/lib/klist.c
index 358a368a2947..89b485a2a58d 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k)
EXPORT_SYMBOL_GPL(klist_add_tail);
/**
- * klist_add_after - Init a klist_node and add it after an existing node
+ * klist_add_behind - Init a klist_node and add it after an existing node
* @n: node we're adding.
* @pos: node to put @n after
*/
-void klist_add_after(struct klist_node *n, struct klist_node *pos)
+void klist_add_behind(struct klist_node *n, struct klist_node *pos)
{
struct klist *k = knode_klist(pos);
@@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos)
list_add(&n->n_node, &pos->n_node);
spin_unlock(&k->k_lock);
}
-EXPORT_SYMBOL_GPL(klist_add_after);
+EXPORT_SYMBOL_GPL(klist_add_behind);
/**
* klist_add_before - Init a klist_node and add it before an existing node
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 1183fa70a44d..12bcba1c8612 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -1,3 +1,6 @@
+
+#define pr_fmt(fmt) "list_sort_test: " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/list_sort.h>
@@ -47,6 +50,7 @@ static void merge_and_restore_back_links(void *priv,
struct list_head *a, struct list_head *b)
{
struct list_head *tail = head;
+ u8 count = 0;
while (a && b) {
/* if equal, take 'a' -- important for sort stability */
@@ -70,7 +74,8 @@ static void merge_and_restore_back_links(void *priv,
* element comparison is needed, so the client's cmp()
* routine can invoke cond_resched() periodically.
*/
- (*cmp)(priv, tail->next, tail->next);
+ if (unlikely(!(++count)))
+ (*cmp)(priv, tail->next, tail->next);
tail->next->prev = tail;
tail = tail->next;
@@ -123,9 +128,7 @@ void list_sort(void *priv, struct list_head *head,
}
if (lev > max_lev) {
if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
- printk_once(KERN_DEBUG "list passed to"
- " list_sort() too long for"
- " efficiency\n");
+ printk_once(KERN_DEBUG "list too long for efficiency\n");
lev--;
}
max_lev = lev;
@@ -168,27 +171,25 @@ static struct debug_el **elts __initdata;
static int __init check(struct debug_el *ela, struct debug_el *elb)
{
if (ela->serial >= TEST_LIST_LEN) {
- printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
- ela->serial);
+ pr_err("error: incorrect serial %d\n", ela->serial);
return -EINVAL;
}
if (elb->serial >= TEST_LIST_LEN) {
- printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
- elb->serial);
+ pr_err("error: incorrect serial %d\n", elb->serial);
return -EINVAL;
}
if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
- printk(KERN_ERR "list_sort_test: error: phantom element\n");
+ pr_err("error: phantom element\n");
return -EINVAL;
}
if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
- printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
- ela->poison1, ela->poison2);
+ pr_err("error: bad poison: %#x/%#x\n",
+ ela->poison1, ela->poison2);
return -EINVAL;
}
if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
- printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
- elb->poison1, elb->poison2);
+ pr_err("error: bad poison: %#x/%#x\n",
+ elb->poison1, elb->poison2);
return -EINVAL;
}
return 0;
@@ -207,25 +208,23 @@ static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
static int __init list_sort_test(void)
{
- int i, count = 1, err = -EINVAL;
+ int i, count = 1, err = -ENOMEM;
struct debug_el *el;
- struct list_head *cur, *tmp;
+ struct list_head *cur;
LIST_HEAD(head);
- printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
+ pr_debug("start testing list_sort()\n");
- elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
+ elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL);
if (!elts) {
- printk(KERN_ERR "list_sort_test: error: cannot allocate "
- "memory\n");
- goto exit;
+ pr_err("error: cannot allocate memory\n");
+ return err;
}
for (i = 0; i < TEST_LIST_LEN; i++) {
el = kmalloc(sizeof(*el), GFP_KERNEL);
if (!el) {
- printk(KERN_ERR "list_sort_test: error: cannot "
- "allocate memory\n");
+ pr_err("error: cannot allocate memory\n");
goto exit;
}
/* force some equivalencies */
@@ -239,52 +238,52 @@ static int __init list_sort_test(void)
list_sort(NULL, &head, cmp);
+ err = -EINVAL;
for (cur = head.next; cur->next != &head; cur = cur->next) {
struct debug_el *el1;
int cmp_result;
if (cur->next->prev != cur) {
- printk(KERN_ERR "list_sort_test: error: list is "
- "corrupted\n");
+ pr_err("error: list is corrupted\n");
goto exit;
}
cmp_result = cmp(NULL, cur, cur->next);
if (cmp_result > 0) {
- printk(KERN_ERR "list_sort_test: error: list is not "
- "sorted\n");
+ pr_err("error: list is not sorted\n");
goto exit;
}
el = container_of(cur, struct debug_el, list);
el1 = container_of(cur->next, struct debug_el, list);
if (cmp_result == 0 && el->serial >= el1->serial) {
- printk(KERN_ERR "list_sort_test: error: order of "
- "equivalent elements not preserved\n");
+ pr_err("error: order of equivalent elements not "
+ "preserved\n");
goto exit;
}
if (check(el, el1)) {
- printk(KERN_ERR "list_sort_test: error: element check "
- "failed\n");
+ pr_err("error: element check failed\n");
goto exit;
}
count++;
}
+ if (head.prev != cur) {
+ pr_err("error: list is corrupted\n");
+ goto exit;
+ }
+
if (count != TEST_LIST_LEN) {
- printk(KERN_ERR "list_sort_test: error: bad list length %d",
- count);
+ pr_err("error: bad list length %d", count);
goto exit;
}
err = 0;
exit:
+ for (i = 0; i < TEST_LIST_LEN; i++)
+ kfree(elts[i]);
kfree(elts);
- list_for_each_safe(cur, tmp, &head) {
- list_del(cur);
- kfree(container_of(cur, struct debug_el, list));
- }
return err;
}
module_init(list_sort_test);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 3a8e8e8fb2a5..4251cbd5becb 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(sg_nents);
**/
struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
{
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
struct scatterlist *ret = &sgl[nents - 1];
#else
struct scatterlist *sg, *ret = NULL;
@@ -251,7 +251,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
if (nents == 0)
return -EINVAL;
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
if (WARN_ON_ONCE(nents > max_ents))
return -EINVAL;
#endif
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index ed5c1454dd62..29033f319aea 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -25,12 +25,15 @@
int string_get_size(u64 size, const enum string_size_units units,
char *buf, int len)
{
- static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB",
- "EB", "ZB", "YB", NULL};
- static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
- "EiB", "ZiB", "YiB", NULL };
- static const char **units_str[] = {
- [STRING_UNITS_10] = units_10,
+ static const char *const units_10[] = {
+ "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL
+ };
+ static const char *const units_2[] = {
+ "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB",
+ NULL
+ };
+ static const char *const *const units_str[] = {
+ [STRING_UNITS_10] = units_10,
[STRING_UNITS_2] = units_2,
};
static const unsigned int divisor[] = {
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
index bea3f3fa3f02..4137bca5f8e8 100644
--- a/lib/test-kstrtox.c
+++ b/lib/test-kstrtox.c
@@ -3,7 +3,7 @@
#include <linux/module.h>
#define for_each_test(i, test) \
- for (i = 0; i < sizeof(test) / sizeof(test[0]); i++)
+ for (i = 0; i < ARRAY_SIZE(test); i++)
struct test_fail {
const char *str;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 6fe2c84eb055..0eced40344dd 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1183,6 +1183,21 @@ char *address_val(char *buf, char *end, const void *addr,
return number(buf, end, num, spec);
}
+static noinline_for_stack
+char *comm_name(char *buf, char *end, struct task_struct *tsk,
+ struct printf_spec spec, const char *fmt)
+{
+ char name[TASK_COMM_LEN];
+
+ /* Caller can pass NULL instead of current. */
+ if (!tsk)
+ tsk = current;
+ /* Not using get_task_comm() in case I'm in IRQ context. */
+ memcpy(name, tsk->comm, TASK_COMM_LEN);
+ name[sizeof(name) - 1] = '\0';
+ return string(buf, end, name, spec);
+}
+
int kptr_restrict __read_mostly;
/*
@@ -1250,6 +1265,7 @@ int kptr_restrict __read_mostly;
* (default assumed to be phys_addr_t, passed by reference)
* - 'd[234]' For a dentry name (optionally 2-4 last components)
* - 'D[234]' Same as 'd' but for a struct file
+ * - 'T' task_struct->comm
*
* Note: The difference between 'S' and 'F' is that on ia64 and ppc64
* function pointers are really function descriptors, which contain a
@@ -1261,7 +1277,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
{
int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
- if (!ptr && *fmt != 'K') {
+ if (!ptr && *fmt != 'K' && *fmt != 'T') {
/*
* Print (null) with the same width as a pointer so it makes
* tabular output look nice.
@@ -1389,6 +1405,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
return dentry_name(buf, end,
((const struct file *)ptr)->f_path.dentry,
spec, fmt);
+ case 'T':
+ return comm_name(buf, end, ptr, spec, fmt);
}
spec.flags |= SMALL;
if (spec.field_width == -1) {
diff --git a/mm/Kconfig b/mm/Kconfig
index 3e9977a9d657..886db2158538 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -508,21 +508,34 @@ config CMA_DEBUG
processing calls such as dma_alloc_from_contiguous().
This option does not affect warning and error messages.
-config ZBUD
- tristate
- default n
+config CMA_AREAS
+ int "Maximum count of the CMA areas"
+ depends on CMA
+ default 7
help
- A special purpose allocator for storing compressed pages.
- It is designed to store up to two compressed pages per physical
- page. While this design limits storage density, it has simple and
- deterministic reclaim properties that make it preferable to a higher
- density approach when reclaim will be used.
+ CMA allows to create CMA areas for particular purpose, mainly,
+ used as device private area. This parameter sets the maximum
+ number of CMA area in the system.
+
+ If unsure, leave the default value "7".
+
+config MEM_SOFT_DIRTY
+ bool "Track memory changes"
+ depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
+ select PROC_PAGE_MONITOR
+ help
+ This option enables memory changes tracking by introducing a
+ soft-dirty bit on pte-s. This bit it set when someone writes
+ into a page just as regular dirty bit, but unlike the latter
+ it can be cleared by hands.
+
+ See Documentation/vm/soft-dirty.txt for more details.
config ZSWAP
bool "Compressed cache for swap pages (EXPERIMENTAL)"
depends on FRONTSWAP && CRYPTO=y
select CRYPTO_LZO
- select ZBUD
+ select ZPOOL
default n
help
A lightweight compressed cache for swap pages. It takes
@@ -538,17 +551,22 @@ config ZSWAP
they have not be fully explored on the large set of potential
configurations and workloads that exist.
-config MEM_SOFT_DIRTY
- bool "Track memory changes"
- depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
- select PROC_PAGE_MONITOR
+config ZPOOL
+ tristate "Common API for compressed memory storage"
+ default n
help
- This option enables memory changes tracking by introducing a
- soft-dirty bit on pte-s. This bit it set when someone writes
- into a page just as regular dirty bit, but unlike the latter
- it can be cleared by hands.
+ Compressed memory storage API. This allows using either zbud or
+ zsmalloc.
- See Documentation/vm/soft-dirty.txt for more details.
+config ZBUD
+ tristate "Low density storage for compressed pages"
+ default n
+ help
+ A special purpose allocator for storing compressed pages.
+ It is designed to store up to two compressed pages per physical
+ page. While this design limits storage density, it has simple and
+ deterministic reclaim properties that make it preferable to a higher
+ density approach when reclaim will be used.
config ZSMALLOC
tristate "Memory allocator for compressed pages"
diff --git a/mm/Makefile b/mm/Makefile
index 4064f3ec145e..cb23cd277229 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -60,5 +60,7 @@ obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
obj-$(CONFIG_CLEANCACHE) += cleancache.o
obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
obj-$(CONFIG_ZBUD) += zbud.o
+obj-$(CONFIG_ZPOOL) += zpool.o
obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
+obj-$(CONFIG_CMA) += cma.o
diff --git a/mm/cma.c b/mm/cma.c
new file mode 100644
index 000000000000..c17751c0dcaf
--- /dev/null
+++ b/mm/cma.c
@@ -0,0 +1,335 @@
+/*
+ * Contiguous Memory Allocator
+ *
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Copyright IBM Corporation, 2013
+ * Copyright LG Electronics Inc., 2014
+ * Written by:
+ * Marek Szyprowski <m.szyprowski@samsung.com>
+ * Michal Nazarewicz <mina86@mina86.com>
+ * Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ * Joonsoo Kim <iamjoonsoo.kim@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+#define pr_fmt(fmt) "cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+# define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/log2.h>
+#include <linux/cma.h>
+
+struct cma {
+ unsigned long base_pfn;
+ unsigned long count;
+ unsigned long *bitmap;
+ unsigned int order_per_bit; /* Order of pages represented by one bit */
+ struct mutex lock;
+};
+
+static struct cma cma_areas[MAX_CMA_AREAS];
+static unsigned cma_area_count;
+static DEFINE_MUTEX(cma_mutex);
+
+phys_addr_t cma_get_base(struct cma *cma)
+{
+ return PFN_PHYS(cma->base_pfn);
+}
+
+unsigned long cma_get_size(struct cma *cma)
+{
+ return cma->count << PAGE_SHIFT;
+}
+
+static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
+{
+ return (1UL << (align_order >> cma->order_per_bit)) - 1;
+}
+
+static unsigned long cma_bitmap_maxno(struct cma *cma)
+{
+ return cma->count >> cma->order_per_bit;
+}
+
+static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
+ unsigned long pages)
+{
+ return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
+}
+
+static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count)
+{
+ unsigned long bitmap_no, bitmap_count;
+
+ bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
+ bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+ mutex_lock(&cma->lock);
+ bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
+ mutex_unlock(&cma->lock);
+}
+
+static int __init cma_activate_area(struct cma *cma)
+{
+ int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
+ unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+ unsigned i = cma->count >> pageblock_order;
+ struct zone *zone;
+
+ cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+ if (!cma->bitmap)
+ return -ENOMEM;
+
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ zone = page_zone(pfn_to_page(pfn));
+
+ do {
+ unsigned j;
+
+ base_pfn = pfn;
+ for (j = pageblock_nr_pages; j; --j, pfn++) {
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ /*
+ * alloc_contig_range requires the pfn range
+ * specified to be in the same zone. Make this
+ * simple by forcing the entire CMA resv range
+ * to be in the same zone.
+ */
+ if (page_zone(pfn_to_page(pfn)) != zone)
+ goto err;
+ }
+ init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+ } while (--i);
+
+ mutex_init(&cma->lock);
+ return 0;
+
+err:
+ kfree(cma->bitmap);
+ return -EINVAL;
+}
+
+static int __init cma_init_reserved_areas(void)
+{
+ int i;
+
+ for (i = 0; i < cma_area_count; i++) {
+ int ret = cma_activate_area(&cma_areas[i]);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+core_initcall(cma_init_reserved_areas);
+
+/**
+ * cma_declare_contiguous() - reserve custom contiguous area
+ * @base: Base address of the reserved area optional, use 0 for any
+ * @size: Size of the reserved area (in bytes),
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ * @alignment: Alignment for the CMA area, should be power of 2 or zero
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @fixed: hint about where to place the reserved area
+ * @res_cma: Pointer to store the created cma region.
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base. If false,
+ * reserve in range from @base to @limit.
+ */
+int __init cma_declare_contiguous(phys_addr_t base,
+ phys_addr_t size, phys_addr_t limit,
+ phys_addr_t alignment, unsigned int order_per_bit,
+ bool fixed, struct cma **res_cma)
+{
+ struct cma *cma;
+ int ret = 0;
+
+ pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
+ __func__, (unsigned long)size, (unsigned long)base,
+ (unsigned long)limit, (unsigned long)alignment);
+
+ if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+ pr_err("Not enough slots for CMA reserved regions!\n");
+ return -ENOSPC;
+ }
+
+ if (!size)
+ return -EINVAL;
+
+ if (alignment && !is_power_of_2(alignment))
+ return -EINVAL;
+
+ /*
+ * Sanitise input arguments.
+ * Pages both ends in CMA area could be merged into adjacent unmovable
+ * migratetype page by page allocator's buddy algorithm. In the case,
+ * you couldn't get a contiguous memory, which is not what we want.
+ */
+ alignment = max(alignment,
+ (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+ base = ALIGN(base, alignment);
+ size = ALIGN(size, alignment);
+ limit &= ~(alignment - 1);
+
+ /* size should be aligned with order_per_bit */
+ if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
+ return -EINVAL;
+
+ /* Reserve memory */
+ if (base && fixed) {
+ if (memblock_is_region_reserved(base, size) ||
+ memblock_reserve(base, size) < 0) {
+ ret = -EBUSY;
+ goto err;
+ }
+ } else {
+ phys_addr_t addr = memblock_alloc_range(size, alignment, base,
+ limit);
+ if (!addr) {
+ ret = -ENOMEM;
+ goto err;
+ } else {
+ base = addr;
+ }
+ }
+
+ /*
+ * Each reserved area must be initialised later, when more kernel
+ * subsystems (like slab allocator) are available.
+ */
+ cma = &cma_areas[cma_area_count];
+ cma->base_pfn = PFN_DOWN(base);
+ cma->count = size >> PAGE_SHIFT;
+ cma->order_per_bit = order_per_bit;
+ *res_cma = cma;
+ cma_area_count++;
+
+ pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
+ (unsigned long)base);
+ return 0;
+
+err:
+ pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+ return ret;
+}
+
+/**
+ * cma_alloc() - allocate pages from contiguous area
+ * @cma: Contiguous memory region for which the allocation is performed.
+ * @count: Requested number of pages.
+ * @align: Requested alignment of pages (in PAGE_SIZE order).
+ *
+ * This function allocates part of contiguous memory on specific
+ * contiguous memory area.
+ */
+struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
+{
+ unsigned long mask, pfn, start = 0;
+ unsigned long bitmap_maxno, bitmap_no, bitmap_count;
+ struct page *page = NULL;
+ int ret;
+
+ if (!cma || !cma->count)
+ return NULL;
+
+ pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+ count, align);
+
+ if (!count)
+ return NULL;
+
+ mask = cma_bitmap_aligned_mask(cma, align);
+ bitmap_maxno = cma_bitmap_maxno(cma);
+ bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+ for (;;) {
+ mutex_lock(&cma->lock);
+ bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
+ bitmap_maxno, start, bitmap_count, mask);
+ if (bitmap_no >= bitmap_maxno) {
+ mutex_unlock(&cma->lock);
+ break;
+ }
+ bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
+ /*
+ * It's safe to drop the lock here. We've marked this region for
+ * our exclusive use. If the migration fails we will take the
+ * lock again and unmark it.
+ */
+ mutex_unlock(&cma->lock);
+
+ pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
+ mutex_lock(&cma_mutex);
+ ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+ mutex_unlock(&cma_mutex);
+ if (ret == 0) {
+ page = pfn_to_page(pfn);
+ break;
+ }
+
+ cma_clear_bitmap(cma, pfn, count);
+ if (ret != -EBUSY)
+ break;
+
+ pr_debug("%s(): memory range at %p is busy, retrying\n",
+ __func__, pfn_to_page(pfn));
+ /* try again with a bit different memory target */
+ start = bitmap_no + mask + 1;
+ }
+
+ pr_debug("%s(): returned %p\n", __func__, page);
+ return page;
+}
+
+/**
+ * cma_release() - release allocated pages
+ * @cma: Contiguous memory region for which the allocation is performed.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function releases memory allocated by alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool cma_release(struct cma *cma, struct page *pages, int count)
+{
+ unsigned long pfn;
+
+ if (!cma || !pages)
+ return false;
+
+ pr_debug("%s(page %p)\n", __func__, (void *)pages);
+
+ pfn = page_to_pfn(pages);
+
+ if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+ return false;
+
+ VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
+
+ free_contig_range(pfn, count);
+ cma_clear_bitmap(cma, pfn, count);
+
+ return true;
+}
diff --git a/mm/compaction.c b/mm/compaction.c
index 21bf292b642a..51750197db11 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -325,14 +325,14 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
/* Found a free page, break it into order-0 pages */
isolated = split_free_page(page);
- total_isolated += isolated;
- for (i = 0; i < isolated; i++) {
- list_add(&page->lru, freelist);
- page++;
- }
-
- /* If a page was split, advance to the end of it */
if (isolated) {
+ total_isolated += isolated;
+ for (i = 0; i < isolated; i++) {
+ list_add(&page->lru, freelist);
+ page++;
+ }
+
+ /* If a page was split, advance to the end of it */
blockpfn += isolated - 1;
cursor += isolated - 1;
continue;
@@ -341,9 +341,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
isolate_fail:
if (strict)
break;
- else
- continue;
-
}
trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated);
diff --git a/mm/filemap.c b/mm/filemap.c
index dafb06f70a09..c2f30ed8e95f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -233,7 +233,6 @@ void delete_from_page_cache(struct page *page)
spin_lock_irq(&mapping->tree_lock);
__delete_from_page_cache(page, NULL);
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
if (freepage)
freepage(page);
@@ -501,8 +500,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
if (PageSwapBacked(new))
__inc_zone_page_state(new, NR_SHMEM);
spin_unlock_irq(&mapping->tree_lock);
- /* mem_cgroup codes must not be called under tree_lock */
- mem_cgroup_replace_page_cache(old, new);
+ mem_cgroup_migrate(old, new, true);
radix_tree_preload_end();
if (freepage)
freepage(old);
@@ -560,19 +558,19 @@ static int __add_to_page_cache_locked(struct page *page,
pgoff_t offset, gfp_t gfp_mask,
void **shadowp)
{
+ struct mem_cgroup *memcg;
int error;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapBacked(page), page);
- error = mem_cgroup_charge_file(page, current->mm,
- gfp_mask & GFP_RECLAIM_MASK);
+ error = mem_cgroup_try_charge(page, current->mm, gfp_mask, &memcg);
if (error)
return error;
error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
if (error) {
- mem_cgroup_uncharge_cache_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
return error;
}
@@ -587,13 +585,14 @@ static int __add_to_page_cache_locked(struct page *page,
goto err_insert;
__inc_zone_page_state(page, NR_FILE_PAGES);
spin_unlock_irq(&mapping->tree_lock);
+ mem_cgroup_commit_charge(page, memcg, false);
trace_mm_filemap_add_to_page_cache(page);
return 0;
err_insert:
page->mapping = NULL;
/* Leave page->index set: truncation relies upon it */
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
page_cache_release(page);
return error;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 33514d88fef9..5d562a9fe931 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -715,13 +715,20 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
unsigned long haddr, pmd_t *pmd,
struct page *page)
{
+ struct mem_cgroup *memcg;
pgtable_t pgtable;
spinlock_t *ptl;
VM_BUG_ON_PAGE(!PageCompound(page), page);
+
+ if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg))
+ return VM_FAULT_OOM;
+
pgtable = pte_alloc_one(mm, haddr);
- if (unlikely(!pgtable))
+ if (unlikely(!pgtable)) {
+ mem_cgroup_cancel_charge(page, memcg);
return VM_FAULT_OOM;
+ }
clear_huge_page(page, haddr, HPAGE_PMD_NR);
/*
@@ -734,7 +741,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_none(*pmd))) {
spin_unlock(ptl);
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
put_page(page);
pte_free(mm, pgtable);
} else {
@@ -742,6 +749,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, haddr);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, haddr, pmd, entry);
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
@@ -827,13 +836,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
- if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) {
- put_page(page);
- count_vm_event(THP_FAULT_FALLBACK);
- return VM_FAULT_FALLBACK;
- }
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
- mem_cgroup_uncharge_page(page);
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -979,6 +982,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
struct page *page,
unsigned long haddr)
{
+ struct mem_cgroup *memcg;
spinlock_t *ptl;
pgtable_t pgtable;
pmd_t _pmd;
@@ -999,20 +1003,21 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
__GFP_OTHER_NODE,
vma, address, page_to_nid(page));
if (unlikely(!pages[i] ||
- mem_cgroup_charge_anon(pages[i], mm,
- GFP_KERNEL))) {
+ mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL,
+ &memcg))) {
if (pages[i])
put_page(pages[i]);
- mem_cgroup_uncharge_start();
while (--i >= 0) {
- mem_cgroup_uncharge_page(pages[i]);
+ memcg = (void *)page_private(pages[i]);
+ set_page_private(pages[i], 0);
+ mem_cgroup_cancel_charge(pages[i], memcg);
put_page(pages[i]);
}
- mem_cgroup_uncharge_end();
kfree(pages);
ret |= VM_FAULT_OOM;
goto out;
}
+ set_page_private(pages[i], (unsigned long)memcg);
}
for (i = 0; i < HPAGE_PMD_NR; i++) {
@@ -1041,7 +1046,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
pte_t *pte, entry;
entry = mk_pte(pages[i], vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ memcg = (void *)page_private(pages[i]);
+ set_page_private(pages[i], 0);
page_add_new_anon_rmap(pages[i], vma, haddr);
+ mem_cgroup_commit_charge(pages[i], memcg, false);
+ lru_cache_add_active_or_unevictable(pages[i], vma);
pte = pte_offset_map(&_pmd, haddr);
VM_BUG_ON(!pte_none(*pte));
set_pte_at(mm, haddr, pte, entry);
@@ -1065,12 +1074,12 @@ out:
out_free_pages:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- mem_cgroup_uncharge_start();
for (i = 0; i < HPAGE_PMD_NR; i++) {
- mem_cgroup_uncharge_page(pages[i]);
+ memcg = (void *)page_private(pages[i]);
+ set_page_private(pages[i], 0);
+ mem_cgroup_cancel_charge(pages[i], memcg);
put_page(pages[i]);
}
- mem_cgroup_uncharge_end();
kfree(pages);
goto out;
}
@@ -1081,6 +1090,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
spinlock_t *ptl;
int ret = 0;
struct page *page = NULL, *new_page;
+ struct mem_cgroup *memcg;
unsigned long haddr;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -1132,7 +1142,8 @@ alloc:
goto out;
}
- if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) {
+ if (unlikely(mem_cgroup_try_charge(new_page, mm,
+ GFP_TRANSHUGE, &memcg))) {
put_page(new_page);
if (page) {
split_huge_page(page);
@@ -1161,7 +1172,7 @@ alloc:
put_user_huge_page(page);
if (unlikely(!pmd_same(*pmd, orig_pmd))) {
spin_unlock(ptl);
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
put_page(new_page);
goto out_mn;
} else {
@@ -1170,6 +1181,8 @@ alloc:
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
pmdp_clear_flush(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
set_pmd_at(mm, haddr, pmd, entry);
update_mmu_cache_pmd(vma, address, pmd);
if (!page) {
@@ -1681,7 +1694,7 @@ static void __split_huge_page_refcount(struct page *page,
&page_tail->_count);
/* after clearing PageTail the gup refcount can be released */
- smp_mb();
+ smp_mb__after_atomic();
/*
* retain hwpoison flag of the poisoned tail page:
@@ -1775,6 +1788,8 @@ static int __split_huge_page_map(struct page *page,
if (pmd) {
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
pmd_populate(mm, &_pmd, pgtable);
+ if (pmd_write(*pmd))
+ BUG_ON(page_mapcount(page) != 1);
haddr = address;
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -1784,8 +1799,6 @@ static int __split_huge_page_map(struct page *page,
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (!pmd_write(*pmd))
entry = pte_wrprotect(entry);
- else
- BUG_ON(page_mapcount(page) != 1);
if (!pmd_young(*pmd))
entry = pte_mkold(entry);
if (pmd_numa(*pmd))
@@ -1838,7 +1851,7 @@ static void __split_huge_page(struct page *page,
struct list_head *list)
{
int mapcount, mapcount2;
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff_t pgoff = page_pgoff(page);
struct anon_vma_chain *avc;
BUG_ON(!PageHead(page));
@@ -2389,6 +2402,7 @@ static void collapse_huge_page(struct mm_struct *mm,
spinlock_t *pmd_ptl, *pte_ptl;
int isolated;
unsigned long hstart, hend;
+ struct mem_cgroup *memcg;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -2399,7 +2413,8 @@ static void collapse_huge_page(struct mm_struct *mm,
if (!new_page)
return;
- if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)))
+ if (unlikely(mem_cgroup_try_charge(new_page, mm,
+ GFP_TRANSHUGE, &memcg)))
return;
/*
@@ -2486,6 +2501,8 @@ static void collapse_huge_page(struct mm_struct *mm,
spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
page_add_new_anon_rmap(new_page, vma, address);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, address, pmd, _pmd);
update_mmu_cache_pmd(vma, address, pmd);
@@ -2499,7 +2516,7 @@ out_up_write:
return;
out:
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
goto out_up_write;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2024bbd573d2..7faab717ba19 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -31,6 +31,7 @@
#include <linux/io.h>
#include <linux/hugetlb.h>
+#include <linux/hugetlb_inline.h>
#include <linux/hugetlb_cgroup.h>
#include <linux/node.h>
#include "internal.h"
diff --git a/mm/internal.h b/mm/internal.h
index 7f22a11fcc66..a1b651b11c5f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -247,7 +247,7 @@ static inline void mlock_migrate_page(struct page *new, struct page *old) { }
static inline struct page *mem_map_offset(struct page *base, int offset)
{
if (unlikely(offset >= MAX_ORDER_NR_PAGES))
- return pfn_to_page(page_to_pfn(base) + offset);
+ return nth_page(base, offset);
return base + offset;
}
diff --git a/mm/madvise.c b/mm/madvise.c
index a402f8fdc68e..0938b30da4ab 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -292,9 +292,6 @@ static long madvise_dontneed(struct vm_area_struct *vma,
/*
* Application wants to free up the pages and associated backing store.
* This is effectively punching a hole into the middle of a file.
- *
- * NOTE: Currently, only shmfs/tmpfs is supported for this operation.
- * Other filesystems return -ENOSYS.
*/
static long madvise_remove(struct vm_area_struct *vma,
struct vm_area_struct **prev,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a2c7bcb0e6eb..6c3ffb02651e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -882,13 +882,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
return val;
}
-static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
- bool charge)
-{
- int val = (charge) ? 1 : -1;
- this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
-}
-
static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
enum mem_cgroup_events_index idx)
{
@@ -909,13 +902,15 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
struct page *page,
- bool anon, int nr_pages)
+ int nr_pages)
{
+ preempt_disable();
+
/*
* Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
* counted as CACHE even if it's on ANON LRU.
*/
- if (anon)
+ if (PageAnon(page))
__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
nr_pages);
else
@@ -935,6 +930,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
}
__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
+ preempt_enable();
}
unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -1347,20 +1343,6 @@ out:
return lruvec;
}
-/*
- * Following LRU functions are allowed to be used without PCG_LOCK.
- * Operations are called by routine of global LRU independently from memcg.
- * What we have to take care of here is validness of pc->mem_cgroup.
- *
- * Changes to pc->mem_cgroup happens when
- * 1. charge
- * 2. moving account
- * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
- * It is added to LRU before charge.
- * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
- * When moving account, the page is not on LRU. It's isolated.
- */
-
/**
* mem_cgroup_page_lruvec - return lruvec for adding an lru page
* @page: the page
@@ -2261,22 +2243,14 @@ cleanup:
*
* Notes: Race condition
*
- * We usually use lock_page_cgroup() for accessing page_cgroup member but
- * it tends to be costly. But considering some conditions, we doesn't need
- * to do so _always_.
- *
- * Considering "charge", lock_page_cgroup() is not required because all
- * file-stat operations happen after a page is attached to radix-tree. There
- * are no race with "charge".
+ * Charging occurs during page instantiation, while the page is
+ * unmapped and locked in page migration, or while the page table is
+ * locked in THP migration. No race is possible.
*
- * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup
- * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even
- * if there are race with "uncharge". Statistics itself is properly handled
- * by flags.
+ * Uncharge happens to pages with zero references, no race possible.
*
- * Considering "move", this is an only case we see a race. To make the race
- * small, we check memcg->moving_account and detect there are possibility
- * of race or not. If there is, we take a lock.
+ * Charge moving between groups is protected by checking mm->moving
+ * account and taking the move_lock in the slowpath.
*/
void __mem_cgroup_begin_update_page_stat(struct page *page,
@@ -2551,55 +2525,63 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
return NOTIFY_OK;
}
-
-/* See mem_cgroup_try_charge() for details */
-enum {
- CHARGE_OK, /* success */
- CHARGE_RETRY, /* need to retry but retry is not bad */
- CHARGE_NOMEM, /* we can't do more. return -ENOMEM */
- CHARGE_WOULDBLOCK, /* GFP_WAIT wasn't set and no enough res. */
-};
-
-static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
- unsigned int nr_pages, unsigned int min_pages,
- bool invoke_oom)
+static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ unsigned int nr_pages)
{
- unsigned long csize = nr_pages * PAGE_SIZE;
+ unsigned int batch = max(CHARGE_BATCH, nr_pages);
+ int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup *mem_over_limit;
struct res_counter *fail_res;
+ unsigned long nr_reclaimed;
unsigned long flags = 0;
- int ret;
+ unsigned long long size;
+ int ret = 0;
- ret = res_counter_charge(&memcg->res, csize, &fail_res);
+retry:
+ if (consume_stock(memcg, nr_pages))
+ goto done;
- if (likely(!ret)) {
+ size = batch * PAGE_SIZE;
+ if (!res_counter_charge(&memcg->res, size, &fail_res)) {
if (!do_swap_account)
- return CHARGE_OK;
- ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
- if (likely(!ret))
- return CHARGE_OK;
-
- res_counter_uncharge(&memcg->res, csize);
+ goto done_restock;
+ if (!res_counter_charge(&memcg->memsw, size, &fail_res))
+ goto done_restock;
+ res_counter_uncharge(&memcg->res, size);
mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
flags |= MEM_CGROUP_RECLAIM_NOSWAP;
} else
mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
+
+ if (batch > nr_pages) {
+ batch = nr_pages;
+ goto retry;
+ }
+
/*
- * Never reclaim on behalf of optional batching, retry with a
- * single page instead.
+ * Unlike in global OOM situations, memcg is not in a physical
+ * memory shortage. Allow dying and OOM-killed tasks to
+ * bypass the last charges so that they can exit quickly and
+ * free their memory.
*/
- if (nr_pages > min_pages)
- return CHARGE_RETRY;
+ if (unlikely(test_thread_flag(TIF_MEMDIE) ||
+ fatal_signal_pending(current) ||
+ current->flags & PF_EXITING))
+ goto bypass;
+
+ if (unlikely(task_in_memcg_oom(current)))
+ goto nomem;
if (!(gfp_mask & __GFP_WAIT))
- return CHARGE_WOULDBLOCK;
+ goto nomem;
- if (gfp_mask & __GFP_NORETRY)
- return CHARGE_NOMEM;
+ nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
+
+ if (mem_cgroup_margin(mem_over_limit) >= batch)
+ goto retry;
- ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
- if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
- return CHARGE_RETRY;
+ if (gfp_mask & __GFP_NORETRY)
+ goto nomem;
/*
* Even though the limit is exceeded at this point, reclaim
* may have been able to free some pages. Retry the charge
@@ -2609,142 +2591,47 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
* unlikely to succeed so close to the limit, and we fall back
* to regular pages anyway in case of failure.
*/
- if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret)
- return CHARGE_RETRY;
-
+ if (nr_reclaimed && batch <= (1 << PAGE_ALLOC_COSTLY_ORDER))
+ goto retry;
/*
* At task move, charge accounts can be doubly counted. So, it's
* better to wait until the end of task_move if something is going on.
*/
if (mem_cgroup_wait_acct_move(mem_over_limit))
- return CHARGE_RETRY;
-
- if (invoke_oom)
- mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize));
-
- return CHARGE_NOMEM;
-}
-
-/**
- * mem_cgroup_try_charge - try charging a memcg
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns 0 if @memcg was charged successfully, -EINTR if the charge
- * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
- */
-static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
- gfp_t gfp_mask,
- unsigned int nr_pages,
- bool oom)
-{
- unsigned int batch = max(CHARGE_BATCH, nr_pages);
- int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
- int ret;
-
- if (mem_cgroup_is_root(memcg))
- goto done;
- /*
- * Unlike in global OOM situations, memcg is not in a physical
- * memory shortage. Allow dying and OOM-killed tasks to
- * bypass the last charges so that they can exit quickly and
- * free their memory.
- */
- if (unlikely(test_thread_flag(TIF_MEMDIE) ||
- fatal_signal_pending(current) ||
- current->flags & PF_EXITING))
- goto bypass;
+ goto retry;
- if (unlikely(task_in_memcg_oom(current)))
- goto nomem;
+ if (nr_retries--)
+ goto retry;
if (gfp_mask & __GFP_NOFAIL)
- oom = false;
-again:
- if (consume_stock(memcg, nr_pages))
- goto done;
-
- do {
- bool invoke_oom = oom && !nr_oom_retries;
-
- /* If killed, bypass charge */
- if (fatal_signal_pending(current))
- goto bypass;
+ goto bypass;
- ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
- nr_pages, invoke_oom);
- switch (ret) {
- case CHARGE_OK:
- break;
- case CHARGE_RETRY: /* not in OOM situation but retry */
- batch = nr_pages;
- goto again;
- case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
- goto nomem;
- case CHARGE_NOMEM: /* OOM routine works */
- if (!oom || invoke_oom)
- goto nomem;
- nr_oom_retries--;
- break;
- }
- } while (ret != CHARGE_OK);
+ if (fatal_signal_pending(current))
+ goto bypass;
- if (batch > nr_pages)
- refill_stock(memcg, batch - nr_pages);
-done:
- return 0;
+ mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(batch));
nomem:
if (!(gfp_mask & __GFP_NOFAIL))
return -ENOMEM;
bypass:
- return -EINTR;
-}
-
-/**
- * mem_cgroup_try_charge_mm - try charging a mm
- * @mm: mm_struct to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns the charged mem_cgroup associated with the given mm_struct or
- * NULL the charge failed.
- */
-static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
- gfp_t gfp_mask,
- unsigned int nr_pages,
- bool oom)
-
-{
- struct mem_cgroup *memcg;
- int ret;
+ memcg = root_mem_cgroup;
+ ret = -EINTR;
+ goto retry;
- memcg = get_mem_cgroup_from_mm(mm);
- ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
- css_put(&memcg->css);
- if (ret == -EINTR)
- memcg = root_mem_cgroup;
- else if (ret)
- memcg = NULL;
-
- return memcg;
+done_restock:
+ if (batch > nr_pages)
+ refill_stock(memcg, batch - nr_pages);
+done:
+ return ret;
}
-/*
- * Somemtimes we have to undo a charge we got by try_charge().
- * This function is for that and do uncharge, put css's refcnt.
- * gotten by try_charge().
- */
-static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
- unsigned int nr_pages)
+static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
{
- if (!mem_cgroup_is_root(memcg)) {
- unsigned long bytes = nr_pages * PAGE_SIZE;
+ unsigned long bytes = nr_pages * PAGE_SIZE;
- res_counter_uncharge(&memcg->res, bytes);
- if (do_swap_account)
- res_counter_uncharge(&memcg->memsw, bytes);
- }
+ res_counter_uncharge(&memcg->res, bytes);
+ if (do_swap_account)
+ res_counter_uncharge(&memcg->memsw, bytes);
}
/*
@@ -2756,9 +2643,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
{
unsigned long bytes = nr_pages * PAGE_SIZE;
- if (mem_cgroup_is_root(memcg))
- return;
-
res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
if (do_swap_account)
res_counter_uncharge_until(&memcg->memsw,
@@ -2779,6 +2663,16 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
return mem_cgroup_from_id(id);
}
+/*
+ * try_get_mem_cgroup_from_page - look up page's memcg association
+ * @page: the page
+ *
+ * Look up, get a css reference, and return the memcg that owns @page.
+ *
+ * The page must be locked to prevent racing with swap-in and page
+ * cache charges. If coming from an unlocked page table, the caller
+ * must ensure the page is on the LRU or this can race with charging.
+ */
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
{
struct mem_cgroup *memcg = NULL;
@@ -2789,7 +2683,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
VM_BUG_ON_PAGE(!PageLocked(page), page);
pc = lookup_page_cgroup(page);
- lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
memcg = pc->mem_cgroup;
if (memcg && !css_tryget_online(&memcg->css))
@@ -2803,23 +2696,17 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
memcg = NULL;
rcu_read_unlock();
}
- unlock_page_cgroup(pc);
return memcg;
}
-static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
- struct page *page,
- unsigned int nr_pages,
- enum charge_type ctype,
- bool lrucare)
+static void commit_charge(struct page *page, struct mem_cgroup *memcg,
+ unsigned int nr_pages, bool lrucare)
{
struct page_cgroup *pc = lookup_page_cgroup(page);
struct zone *uninitialized_var(zone);
struct lruvec *lruvec;
bool was_on_lru = false;
- bool anon;
- lock_page_cgroup(pc);
VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
/*
* we don't need page_cgroup_lock about tail pages, becase they are not
@@ -2841,16 +2728,22 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
}
}
- pc->mem_cgroup = memcg;
/*
- * We access a page_cgroup asynchronously without lock_page_cgroup().
- * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
- * is accessed after testing USED bit. To make pc->mem_cgroup visible
- * before USED bit, we need memory barrier here.
- * See mem_cgroup_add_lru_list(), etc.
+ * Nobody should be changing or seriously looking at
+ * pc->mem_cgroup and pc->flags at this point:
+ *
+ * - the page is uncharged
+ *
+ * - the page is off-LRU
+ *
+ * - an anonymous fault has exclusive page access, except for
+ * a locked page table
+ *
+ * - a page cache insertion, a swapin fault, or a migration
+ * have the page locked
*/
- smp_wmb();
- SetPageCgroupUsed(pc);
+ pc->mem_cgroup = memcg;
+ pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
if (lrucare) {
if (was_on_lru) {
@@ -2862,14 +2755,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
spin_unlock_irq(&zone->lru_lock);
}
- if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
- anon = true;
- else
- anon = false;
-
- mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
- unlock_page_cgroup(pc);
-
+ mem_cgroup_charge_statistics(memcg, page, nr_pages);
/*
* "charge_statistics" updated event counter. Then, check it.
* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
@@ -2937,22 +2823,21 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
if (ret)
return ret;
- ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
- oom_gfp_allowed(gfp));
+ ret = try_charge(memcg, gfp, size >> PAGE_SHIFT);
if (ret == -EINTR) {
/*
- * mem_cgroup_try_charge() chosed to bypass to root due to
- * OOM kill or fatal signal. Since our only options are to
- * either fail the allocation or charge it to this cgroup, do
- * it as a temporary condition. But we can't fail. From a
- * kmem/slab perspective, the cache has already been selected,
- * by mem_cgroup_kmem_get_cache(), so it is too late to change
+ * try_charge() chose to bypass to root due to OOM kill or
+ * fatal signal. Since our only options are to either fail
+ * the allocation or charge it to this cgroup, do it as a
+ * temporary condition. But we can't fail. From a kmem/slab
+ * perspective, the cache has already been selected, by
+ * mem_cgroup_kmem_get_cache(), so it is too late to change
* our minds.
*
* This condition will only trigger if the task entered
- * memcg_charge_kmem in a sane state, but was OOM-killed during
- * mem_cgroup_try_charge() above. Tasks that were already
- * dying when the allocation triggers should have been already
+ * memcg_charge_kmem in a sane state, but was OOM-killed
+ * during try_charge() above. Tasks that were already dying
+ * when the allocation triggers should have been already
* directed to the root cgroup in memcontrol.h
*/
res_counter_charge_nofail(&memcg->res, size, &fail_res);
@@ -3076,6 +2961,8 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
return 0;
}
+static void memcg_unregister_cache_func(struct work_struct *work);
+
int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
struct kmem_cache *root_cache)
{
@@ -3097,6 +2984,9 @@ int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
if (memcg) {
s->memcg_params->memcg = memcg;
s->memcg_params->root_cache = root_cache;
+ atomic_long_set(&s->memcg_params->refcnt, 1);
+ INIT_WORK(&s->memcg_params->unregister_work,
+ memcg_unregister_cache_func);
css_get(&memcg->css);
} else
s->memcg_params->is_root_cache = true;
@@ -3178,6 +3068,25 @@ static void memcg_unregister_cache(struct kmem_cache *cachep)
kmem_cache_destroy(cachep);
}
+static void memcg_unregister_cache_func(struct work_struct *work)
+{
+ struct memcg_cache_params *params =
+ container_of(work, struct memcg_cache_params, unregister_work);
+ struct kmem_cache *cachep = memcg_params_to_cache(params);
+
+ mutex_lock(&memcg_slab_mutex);
+ memcg_unregister_cache(cachep);
+ mutex_unlock(&memcg_slab_mutex);
+}
+
+static void memcg_unregister_cache_rcu_func(struct rcu_head *rcu)
+{
+ struct memcg_cache_params *params =
+ container_of(rcu, struct memcg_cache_params, rcu_head);
+
+ schedule_work(&params->unregister_work);
+}
+
/*
* During the creation a new cache, we need to disable our accounting mechanism
* altogether. This is true even if we are not creating, but rather just
@@ -3233,6 +3142,7 @@ static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
{
struct kmem_cache *cachep;
struct memcg_cache_params *params, *tmp;
+ LIST_HEAD(empty_caches);
if (!memcg_kmem_is_active(memcg))
return;
@@ -3240,9 +3150,31 @@ static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
mutex_lock(&memcg_slab_mutex);
list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
cachep = memcg_params_to_cache(params);
+
+ memcg_cache_mark_dead(cachep);
kmem_cache_shrink(cachep);
- if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
- memcg_unregister_cache(cachep);
+
+ if (atomic_long_dec_and_test(&cachep->memcg_params->refcnt))
+ list_move(&cachep->memcg_params->list, &empty_caches);
+ }
+
+ /*
+ * kmem_cache_free doesn't expect that the cache can be destroyed as
+ * soon as the object is freed, e.g. SLUB's implementation may want to
+ * update cache stats after putting the object to the free list.
+ *
+ * Therefore we should wait for all kmem_cache_free's to finish before
+ * proceeding to cache destruction. Since both SLAB and SLUB versions
+ * of kmem_cache_free are non-preemptable, we wait for rcu-sched grace
+ * period to elapse.
+ */
+ synchronize_sched();
+
+ while (!list_empty(&empty_caches)) {
+ params = list_first_entry(&empty_caches,
+ struct memcg_cache_params, list);
+ cachep = memcg_params_to_cache(params);
+ memcg_unregister_cache(cachep);
}
mutex_unlock(&memcg_slab_mutex);
}
@@ -3315,14 +3247,18 @@ int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp,
PAGE_SIZE << order);
if (!res)
- atomic_add(1 << order, &cachep->memcg_params->nr_pages);
+ atomic_long_inc(&cachep->memcg_params->refcnt);
return res;
}
void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
{
memcg_uncharge_kmem(cachep->memcg_params->memcg, PAGE_SIZE << order);
- atomic_sub(1 << order, &cachep->memcg_params->nr_pages);
+
+ if (unlikely(atomic_long_dec_and_test(&cachep->memcg_params->refcnt)))
+ /* see memcg_unregister_all_caches */
+ call_rcu_sched(&cachep->memcg_params->rcu_head,
+ memcg_unregister_cache_rcu_func);
}
/*
@@ -3463,12 +3399,13 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
return;
}
-
+ /*
+ * The page is freshly allocated and not visible to any
+ * outside callers yet. Set up pc non-atomically.
+ */
pc = lookup_page_cgroup(page);
- lock_page_cgroup(pc);
pc->mem_cgroup = memcg;
- SetPageCgroupUsed(pc);
- unlock_page_cgroup(pc);
+ pc->flags = PCG_USED;
}
void __memcg_kmem_uncharge_pages(struct page *page, int order)
@@ -3478,19 +3415,11 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
pc = lookup_page_cgroup(page);
- /*
- * Fast unlocked return. Theoretically might have changed, have to
- * check again after locking.
- */
if (!PageCgroupUsed(pc))
return;
- lock_page_cgroup(pc);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- ClearPageCgroupUsed(pc);
- }
- unlock_page_cgroup(pc);
+ memcg = pc->mem_cgroup;
+ pc->flags = 0;
/*
* We trust that only if there is a memcg associated with the page, it
@@ -3510,7 +3439,6 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
/*
* Because tail pages are not marked as "used", set it. We're under
* zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -3531,8 +3459,7 @@ void mem_cgroup_split_huge_fixup(struct page *head)
for (i = 1; i < HPAGE_PMD_NR; i++) {
pc = head_pc + i;
pc->mem_cgroup = memcg;
- smp_wmb();/* see __commit_charge() */
- pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+ pc->flags = head_pc->flags;
}
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
HPAGE_PMD_NR);
@@ -3562,7 +3489,6 @@ static int mem_cgroup_move_account(struct page *page,
{
unsigned long flags;
int ret;
- bool anon = PageAnon(page);
VM_BUG_ON(from == to);
VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -3576,15 +3502,13 @@ static int mem_cgroup_move_account(struct page *page,
if (nr_pages > 1 && !PageTransHuge(page))
goto out;
- lock_page_cgroup(pc);
-
ret = -EINVAL;
if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
- goto unlock;
+ goto out;
move_lock_mem_cgroup(from, &flags);
- if (!anon && page_mapped(page)) {
+ if (!PageAnon(page) && page_mapped(page)) {
__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
nr_pages);
__this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
@@ -3598,15 +3522,19 @@ static int mem_cgroup_move_account(struct page *page,
nr_pages);
}
- mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
+ mem_cgroup_charge_statistics(from, page, -nr_pages);
+
+ /*
+ * It is safe to change pc->mem_cgroup here because the page
+ * is referenced, charged, and isolated - we can't race with
+ * uncharging, charging, migration, or LRU putback.
+ */
/* caller should have done css_get */
pc->mem_cgroup = to;
- mem_cgroup_charge_statistics(to, page, anon, nr_pages);
+ mem_cgroup_charge_statistics(to, page, nr_pages);
move_unlock_mem_cgroup(from, &flags);
ret = 0;
-unlock:
- unlock_page_cgroup(pc);
/*
* check events
*/
@@ -3682,357 +3610,6 @@ out:
return ret;
}
-int mem_cgroup_charge_anon(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask)
-{
- unsigned int nr_pages = 1;
- struct mem_cgroup *memcg;
- bool oom = true;
-
- if (mem_cgroup_disabled())
- return 0;
-
- VM_BUG_ON_PAGE(page_mapped(page), page);
- VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
- VM_BUG_ON(!mm);
-
- if (PageTransHuge(page)) {
- nr_pages <<= compound_order(page);
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- /*
- * Never OOM-kill a process for a huge page. The
- * fault handler will fall back to regular pages.
- */
- oom = false;
- }
-
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
- if (!memcg)
- return -ENOMEM;
- __mem_cgroup_commit_charge(memcg, page, nr_pages,
- MEM_CGROUP_CHARGE_TYPE_ANON, false);
- return 0;
-}
-
-/*
- * While swap-in, try_charge -> commit or cancel, the page is locked.
- * And when try_charge() successfully returns, one refcnt to memcg without
- * struct page_cgroup is acquired. This refcnt will be consumed by
- * "commit()" or removed by "cancel()"
- */
-static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
- struct page *page,
- gfp_t mask,
- struct mem_cgroup **memcgp)
-{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
- int ret;
-
- pc = lookup_page_cgroup(page);
- /*
- * Every swap fault against a single page tries to charge the
- * page, bail as early as possible. shmem_unuse() encounters
- * already charged pages, too. The USED bit is protected by
- * the page lock, which serializes swap cache removal, which
- * in turn serializes uncharging.
- */
- if (PageCgroupUsed(pc))
- goto out;
- if (do_swap_account)
- memcg = try_get_mem_cgroup_from_page(page);
- if (!memcg)
- memcg = get_mem_cgroup_from_mm(mm);
- ret = mem_cgroup_try_charge(memcg, mask, 1, true);
- css_put(&memcg->css);
- if (ret == -EINTR)
- memcg = root_mem_cgroup;
- else if (ret)
- return ret;
-out:
- *memcgp = memcg;
- return 0;
-}
-
-int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
- gfp_t gfp_mask, struct mem_cgroup **memcgp)
-{
- if (mem_cgroup_disabled()) {
- *memcgp = NULL;
- return 0;
- }
- /*
- * A racing thread's fault, or swapoff, may have already
- * updated the pte, and even removed page from swap cache: in
- * those cases unuse_pte()'s pte_same() test will fail; but
- * there's also a KSM case which does need to charge the page.
- */
- if (!PageSwapCache(page)) {
- struct mem_cgroup *memcg;
-
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
- if (!memcg)
- return -ENOMEM;
- *memcgp = memcg;
- return 0;
- }
- return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
-}
-
-void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
-{
- if (mem_cgroup_disabled())
- return;
- if (!memcg)
- return;
- __mem_cgroup_cancel_charge(memcg, 1);
-}
-
-static void
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
- enum charge_type ctype)
-{
- if (mem_cgroup_disabled())
- return;
- if (!memcg)
- return;
-
- __mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
- /*
- * Now swap is on-memory. This means this page may be
- * counted both as mem and swap....double count.
- * Fix it by uncharging from memsw. Basically, this SwapCache is stable
- * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
- * may call delete_from_swap_cache() before reach here.
- */
- if (do_swap_account && PageSwapCache(page)) {
- swp_entry_t ent = {.val = page_private(page)};
- mem_cgroup_uncharge_swap(ent);
- }
-}
-
-void mem_cgroup_commit_charge_swapin(struct page *page,
- struct mem_cgroup *memcg)
-{
- __mem_cgroup_commit_charge_swapin(page, memcg,
- MEM_CGROUP_CHARGE_TYPE_ANON);
-}
-
-int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
-{
- enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
- struct mem_cgroup *memcg;
- int ret;
-
- if (mem_cgroup_disabled())
- return 0;
- if (PageCompound(page))
- return 0;
-
- if (PageSwapCache(page)) { /* shmem */
- ret = __mem_cgroup_try_charge_swapin(mm, page,
- gfp_mask, &memcg);
- if (ret)
- return ret;
- __mem_cgroup_commit_charge_swapin(page, memcg, type);
- return 0;
- }
-
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
- if (!memcg)
- return -ENOMEM;
- __mem_cgroup_commit_charge(memcg, page, 1, type, false);
- return 0;
-}
-
-static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
- unsigned int nr_pages,
- const enum charge_type ctype)
-{
- struct memcg_batch_info *batch = NULL;
- bool uncharge_memsw = true;
-
- /* If swapout, usage of swap doesn't decrease */
- if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
- uncharge_memsw = false;
-
- batch = &current->memcg_batch;
- /*
- * In usual, we do css_get() when we remember memcg pointer.
- * But in this case, we keep res->usage until end of a series of
- * uncharges. Then, it's ok to ignore memcg's refcnt.
- */
- if (!batch->memcg)
- batch->memcg = memcg;
- /*
- * do_batch > 0 when unmapping pages or inode invalidate/truncate.
- * In those cases, all pages freed continuously can be expected to be in
- * the same cgroup and we have chance to coalesce uncharges.
- * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
- * because we want to do uncharge as soon as possible.
- */
-
- if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
- goto direct_uncharge;
-
- if (nr_pages > 1)
- goto direct_uncharge;
-
- /*
- * In typical case, batch->memcg == mem. This means we can
- * merge a series of uncharges to an uncharge of res_counter.
- * If not, we uncharge res_counter ony by one.
- */
- if (batch->memcg != memcg)
- goto direct_uncharge;
- /* remember freed charge and uncharge it later */
- batch->nr_pages++;
- if (uncharge_memsw)
- batch->memsw_nr_pages++;
- return;
-direct_uncharge:
- res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
- if (uncharge_memsw)
- res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
- if (unlikely(batch->memcg != memcg))
- memcg_oom_recover(memcg);
-}
-
-/*
- * uncharge if !page_mapped(page)
- */
-static struct mem_cgroup *
-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
- bool end_migration)
-{
- struct mem_cgroup *memcg = NULL;
- unsigned int nr_pages = 1;
- struct page_cgroup *pc;
- bool anon;
-
- if (mem_cgroup_disabled())
- return NULL;
-
- if (PageTransHuge(page)) {
- nr_pages <<= compound_order(page);
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- }
- /*
- * Check if our page_cgroup is valid
- */
- pc = lookup_page_cgroup(page);
- if (unlikely(!PageCgroupUsed(pc)))
- return NULL;
-
- lock_page_cgroup(pc);
-
- memcg = pc->mem_cgroup;
-
- if (!PageCgroupUsed(pc))
- goto unlock_out;
-
- anon = PageAnon(page);
-
- switch (ctype) {
- case MEM_CGROUP_CHARGE_TYPE_ANON:
- /*
- * Generally PageAnon tells if it's the anon statistics to be
- * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
- * used before page reached the stage of being marked PageAnon.
- */
- anon = true;
- /* fallthrough */
- case MEM_CGROUP_CHARGE_TYPE_DROP:
- /* See mem_cgroup_prepare_migration() */
- if (page_mapped(page))
- goto unlock_out;
- /*
- * Pages under migration may not be uncharged. But
- * end_migration() /must/ be the one uncharging the
- * unused post-migration page and so it has to call
- * here with the migration bit still set. See the
- * res_counter handling below.
- */
- if (!end_migration && PageCgroupMigration(pc))
- goto unlock_out;
- break;
- case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
- if (!PageAnon(page)) { /* Shared memory */
- if (page->mapping && !page_is_file_cache(page))
- goto unlock_out;
- } else if (page_mapped(page)) /* Anon */
- goto unlock_out;
- break;
- default:
- break;
- }
-
- mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages);
-
- ClearPageCgroupUsed(pc);
- /*
- * pc->mem_cgroup is not cleared here. It will be accessed when it's
- * freed from LRU. This is safe because uncharged page is expected not
- * to be reused (freed soon). Exception is SwapCache, it's handled by
- * special functions.
- */
-
- unlock_page_cgroup(pc);
- /*
- * even after unlock, we have memcg->res.usage here and this memcg
- * will never be freed, so it's safe to call css_get().
- */
- memcg_check_events(memcg, page);
- if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
- mem_cgroup_swap_statistics(memcg, true);
- css_get(&memcg->css);
- }
- /*
- * Migration does not charge the res_counter for the
- * replacement page, so leave it alone when phasing out the
- * page that is unused after the migration.
- */
- if (!end_migration && !mem_cgroup_is_root(memcg))
- mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
-
- return memcg;
-
-unlock_out:
- unlock_page_cgroup(pc);
- return NULL;
-}
-
-void mem_cgroup_uncharge_page(struct page *page)
-{
- /* early check. */
- if (page_mapped(page))
- return;
- VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
- /*
- * If the page is in swap cache, uncharge should be deferred
- * to the swap path, which also properly accounts swap usage
- * and handles memcg lifetime.
- *
- * Note that this check is not stable and reclaim may add the
- * page to swap cache at any time after this. However, if the
- * page is not in swap cache by the time page->mapcount hits
- * 0, there won't be any page table references to the swap
- * slot, and reclaim will free it and not actually write the
- * page to disk.
- */
- if (PageSwapCache(page))
- return;
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
-}
-
-void mem_cgroup_uncharge_cache_page(struct page *page)
-{
- VM_BUG_ON_PAGE(page_mapped(page), page);
- VM_BUG_ON_PAGE(page->mapping, page);
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
-}
-
/*
* Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
* In that cases, pages are freed continuously and we can expect pages
@@ -4080,58 +3657,12 @@ void mem_cgroup_uncharge_end(void)
batch->memcg = NULL;
}
-#ifdef CONFIG_SWAP
-/*
- * called after __delete_from_swap_cache() and drop "page" account.
- * memcg information is recorded to swap_cgroup of "ent"
- */
-void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
-{
- struct mem_cgroup *memcg;
- int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
-
- if (!swapout) /* this was a swap cache but the swap is unused ! */
- ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
-
- memcg = __mem_cgroup_uncharge_common(page, ctype, false);
-
- /*
- * record memcg information, if swapout && memcg != NULL,
- * css_get() was called in uncharge().
- */
- if (do_swap_account && swapout && memcg)
- swap_cgroup_record(ent, mem_cgroup_id(memcg));
-}
-#endif
-
#ifdef CONFIG_MEMCG_SWAP
-/*
- * called from swap_entry_free(). remove record in swap_cgroup and
- * uncharge "memsw" account.
- */
-void mem_cgroup_uncharge_swap(swp_entry_t ent)
+static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
+ bool charge)
{
- struct mem_cgroup *memcg;
- unsigned short id;
-
- if (!do_swap_account)
- return;
-
- id = swap_cgroup_record(ent, 0);
- rcu_read_lock();
- memcg = mem_cgroup_lookup(id);
- if (memcg) {
- /*
- * We uncharge this because swap is freed. This memcg can
- * be obsolete one. We avoid calling css_tryget_online().
- */
- if (!mem_cgroup_is_root(memcg))
- res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
- mem_cgroup_swap_statistics(memcg, false);
- css_put(&memcg->css);
- }
- rcu_read_unlock();
+ int val = (charge) ? 1 : -1;
+ this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
}
/**
@@ -4183,175 +3714,6 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
}
#endif
-/*
- * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
- * page belongs to.
- */
-void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
- struct mem_cgroup **memcgp)
-{
- struct mem_cgroup *memcg = NULL;
- unsigned int nr_pages = 1;
- struct page_cgroup *pc;
- enum charge_type ctype;
-
- *memcgp = NULL;
-
- if (mem_cgroup_disabled())
- return;
-
- if (PageTransHuge(page))
- nr_pages <<= compound_order(page);
-
- pc = lookup_page_cgroup(page);
- lock_page_cgroup(pc);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- css_get(&memcg->css);
- /*
- * At migrating an anonymous page, its mapcount goes down
- * to 0 and uncharge() will be called. But, even if it's fully
- * unmapped, migration may fail and this page has to be
- * charged again. We set MIGRATION flag here and delay uncharge
- * until end_migration() is called
- *
- * Corner Case Thinking
- * A)
- * When the old page was mapped as Anon and it's unmap-and-freed
- * while migration was ongoing.
- * If unmap finds the old page, uncharge() of it will be delayed
- * until end_migration(). If unmap finds a new page, it's
- * uncharged when it make mapcount to be 1->0. If unmap code
- * finds swap_migration_entry, the new page will not be mapped
- * and end_migration() will find it(mapcount==0).
- *
- * B)
- * When the old page was mapped but migraion fails, the kernel
- * remaps it. A charge for it is kept by MIGRATION flag even
- * if mapcount goes down to 0. We can do remap successfully
- * without charging it again.
- *
- * C)
- * The "old" page is under lock_page() until the end of
- * migration, so, the old page itself will not be swapped-out.
- * If the new page is swapped out before end_migraton, our
- * hook to usual swap-out path will catch the event.
- */
- if (PageAnon(page))
- SetPageCgroupMigration(pc);
- }
- unlock_page_cgroup(pc);
- /*
- * If the page is not charged at this point,
- * we return here.
- */
- if (!memcg)
- return;
-
- *memcgp = memcg;
- /*
- * We charge new page before it's used/mapped. So, even if unlock_page()
- * is called before end_migration, we can catch all events on this new
- * page. In the case new page is migrated but not remapped, new page's
- * mapcount will be finally 0 and we call uncharge in end_migration().
- */
- if (PageAnon(page))
- ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
- else
- ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
- /*
- * The page is committed to the memcg, but it's not actually
- * charged to the res_counter since we plan on replacing the
- * old one and only one page is going to be left afterwards.
- */
- __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
-}
-
-/* remove redundant charge if migration failed*/
-void mem_cgroup_end_migration(struct mem_cgroup *memcg,
- struct page *oldpage, struct page *newpage, bool migration_ok)
-{
- struct page *used, *unused;
- struct page_cgroup *pc;
- bool anon;
-
- if (!memcg)
- return;
-
- if (!migration_ok) {
- used = oldpage;
- unused = newpage;
- } else {
- used = newpage;
- unused = oldpage;
- }
- anon = PageAnon(used);
- __mem_cgroup_uncharge_common(unused,
- anon ? MEM_CGROUP_CHARGE_TYPE_ANON
- : MEM_CGROUP_CHARGE_TYPE_CACHE,
- true);
- css_put(&memcg->css);
- /*
- * We disallowed uncharge of pages under migration because mapcount
- * of the page goes down to zero, temporarly.
- * Clear the flag and check the page should be charged.
- */
- pc = lookup_page_cgroup(oldpage);
- lock_page_cgroup(pc);
- ClearPageCgroupMigration(pc);
- unlock_page_cgroup(pc);
-
- /*
- * If a page is a file cache, radix-tree replacement is very atomic
- * and we can skip this check. When it was an Anon page, its mapcount
- * goes down to 0. But because we added MIGRATION flage, it's not
- * uncharged yet. There are several case but page->mapcount check
- * and USED bit check in mem_cgroup_uncharge_page() will do enough
- * check. (see prepare_charge() also)
- */
- if (anon)
- mem_cgroup_uncharge_page(used);
-}
-
-/*
- * At replace page cache, newpage is not under any memcg but it's on
- * LRU. So, this function doesn't touch res_counter but handles LRU
- * in correct way. Both pages are locked so we cannot race with uncharge.
- */
-void mem_cgroup_replace_page_cache(struct page *oldpage,
- struct page *newpage)
-{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
- enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
-
- if (mem_cgroup_disabled())
- return;
-
- pc = lookup_page_cgroup(oldpage);
- /* fix accounting on old pages */
- lock_page_cgroup(pc);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- mem_cgroup_charge_statistics(memcg, oldpage, false, -1);
- ClearPageCgroupUsed(pc);
- }
- unlock_page_cgroup(pc);
-
- /*
- * When called from shmem_replace_page(), in some cases the
- * oldpage has already been charged, and in some cases not.
- */
- if (!memcg)
- return;
- /*
- * Even if newpage->mapping was NULL before starting replacement,
- * the newpage may be on LRU(or pagevec for LRU) already. We lock
- * LRU while we overwrite pc->mem_cgroup.
- */
- __mem_cgroup_commit_charge(memcg, newpage, 1, type, true);
-}
-
#ifdef CONFIG_DEBUG_VM
static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
{
@@ -4817,78 +4179,24 @@ out:
return retval;
}
-
-static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
- enum mem_cgroup_stat_index idx)
-{
- struct mem_cgroup *iter;
- long val = 0;
-
- /* Per-cpu values can be negative, use a signed accumulator */
- for_each_mem_cgroup_tree(iter, memcg)
- val += mem_cgroup_read_stat(iter, idx);
-
- if (val < 0) /* race ? */
- val = 0;
- return val;
-}
-
-static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
-{
- u64 val;
-
- if (!mem_cgroup_is_root(memcg)) {
- if (!swap)
- return res_counter_read_u64(&memcg->res, RES_USAGE);
- else
- return res_counter_read_u64(&memcg->memsw, RES_USAGE);
- }
-
- /*
- * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
- * as well as in MEM_CGROUP_STAT_RSS_HUGE.
- */
- val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
-
- if (swap)
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
-
- return val << PAGE_SHIFT;
-}
-
static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
- struct cftype *cft)
+ struct cftype *cft)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
- u64 val;
- int name;
- enum res_type type;
-
- type = MEMFILE_TYPE(cft->private);
- name = MEMFILE_ATTR(cft->private);
+ enum res_type type = MEMFILE_TYPE(cft->private);
+ int name = MEMFILE_ATTR(cft->private);
switch (type) {
case _MEM:
- if (name == RES_USAGE)
- val = mem_cgroup_usage(memcg, false);
- else
- val = res_counter_read_u64(&memcg->res, name);
- break;
+ return res_counter_read_u64(&memcg->res, name);
case _MEMSWAP:
- if (name == RES_USAGE)
- val = mem_cgroup_usage(memcg, true);
- else
- val = res_counter_read_u64(&memcg->memsw, name);
- break;
+ return res_counter_read_u64(&memcg->memsw, name);
case _KMEM:
- val = res_counter_read_u64(&memcg->kmem, name);
+ return res_counter_read_u64(&memcg->kmem, name);
break;
default:
BUG();
}
-
- return val;
}
#ifdef CONFIG_MEMCG_KMEM
@@ -5350,7 +4658,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
if (!t)
goto unlock;
- usage = mem_cgroup_usage(memcg, swap);
+ if (!swap)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/*
* current_threshold points to threshold just below or equal to usage.
@@ -5442,15 +4753,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
mutex_lock(&memcg->thresholds_lock);
- if (type == _MEM)
+ if (type == _MEM) {
thresholds = &memcg->thresholds;
- else if (type == _MEMSWAP)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ } else if (type == _MEMSWAP) {
thresholds = &memcg->memsw_thresholds;
- else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ } else
BUG();
- usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
/* Check if a threshold crossed before adding a new one */
if (thresholds->primary)
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -5530,18 +4841,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
int i, j, size;
mutex_lock(&memcg->thresholds_lock);
- if (type == _MEM)
+
+ if (type == _MEM) {
thresholds = &memcg->thresholds;
- else if (type == _MEMSWAP)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ } else if (type == _MEMSWAP) {
thresholds = &memcg->memsw_thresholds;
- else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ } else
BUG();
if (!thresholds->primary)
goto unlock;
- usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
/* Check if a threshold crossed before removing */
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -6296,9 +5608,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
* core guarantees its existence.
*/
} else {
- res_counter_init(&memcg->res, NULL);
- res_counter_init(&memcg->memsw, NULL);
- res_counter_init(&memcg->kmem, NULL);
+ res_counter_init(&memcg->res, &root_mem_cgroup->res);
+ res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw);
+ res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
/*
* Deeper hierachy with use_hierarchy == false doesn't make
* much sense so let cgroup subsystem know about this
@@ -6409,55 +5721,38 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
#ifdef CONFIG_MMU
/* Handlers for move charge at task migration. */
-#define PRECHARGE_COUNT_AT_ONCE 256
static int mem_cgroup_do_precharge(unsigned long count)
{
- int ret = 0;
- int batch_count = PRECHARGE_COUNT_AT_ONCE;
- struct mem_cgroup *memcg = mc.to;
+ int ret;
- if (mem_cgroup_is_root(memcg)) {
+ /* Try a single bulk charge without reclaim first */
+ ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+ if (!ret) {
mc.precharge += count;
- /* we don't need css_get for root */
return ret;
}
- /* try to charge at once */
- if (count > 1) {
- struct res_counter *dummy;
- /*
- * "memcg" cannot be under rmdir() because we've already checked
- * by cgroup_lock_live_cgroup() that it is not removed and we
- * are still under the same cgroup_mutex. So we can postpone
- * css_get().
- */
- if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
- goto one_by_one;
- if (do_swap_account && res_counter_charge(&memcg->memsw,
- PAGE_SIZE * count, &dummy)) {
- res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
- goto one_by_one;
- }
- mc.precharge += count;
+ if (ret == -EINTR) {
+ cancel_charge(root_mem_cgroup, count);
return ret;
}
-one_by_one:
- /* fall back to one by one charge */
+
+ /* Try charges one by one with reclaim */
while (count--) {
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
- if (!batch_count--) {
- batch_count = PRECHARGE_COUNT_AT_ONCE;
- cond_resched();
- }
- ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
+ ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
+ /*
+ * In case of failure, any residual charges against
+ * mc.to will be dropped by mem_cgroup_clear_mc()
+ * later on. However, cancel any charges that are
+ * bypassed to root right away or they'll be lost.
+ */
+ if (ret == -EINTR)
+ cancel_charge(root_mem_cgroup, 1);
if (ret)
- /* mem_cgroup_clear_mc() will do uncharge later */
return ret;
mc.precharge++;
+ cond_resched();
}
- return ret;
+ return 0;
}
/**
@@ -6593,9 +5888,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
if (page) {
pc = lookup_page_cgroup(page);
/*
- * Do only loose check w/o page_cgroup lock.
- * mem_cgroup_move_account() checks the pc is valid or not under
- * the lock.
+ * Do only loose check w/o serialization.
+ * mem_cgroup_move_account() checks the pc is valid or
+ * not under LRU exclusion.
*/
if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
@@ -6720,7 +6015,7 @@ static void __mem_cgroup_clear_mc(void)
/* we must uncharge all the leftover precharges from mc.to */
if (mc.precharge) {
- __mem_cgroup_cancel_charge(mc.to, mc.precharge);
+ cancel_charge(mc.to, mc.precharge);
mc.precharge = 0;
}
/*
@@ -6728,27 +6023,24 @@ static void __mem_cgroup_clear_mc(void)
* we must uncharge here.
*/
if (mc.moved_charge) {
- __mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
+ cancel_charge(mc.from, mc.moved_charge);
mc.moved_charge = 0;
}
/* we must fixup refcnts and charges */
if (mc.moved_swap) {
/* uncharge swap account from the old cgroup */
- if (!mem_cgroup_is_root(mc.from))
- res_counter_uncharge(&mc.from->memsw,
- PAGE_SIZE * mc.moved_swap);
+ res_counter_uncharge(&mc.from->memsw,
+ PAGE_SIZE * mc.moved_swap);
for (i = 0; i < mc.moved_swap; i++)
css_put(&mc.from->css);
- if (!mem_cgroup_is_root(mc.to)) {
- /*
- * we charged both to->res and to->memsw, so we should
- * uncharge to->res.
- */
- res_counter_uncharge(&mc.to->res,
- PAGE_SIZE * mc.moved_swap);
- }
+ /*
+ * we charged both to->res and to->memsw, so we should
+ * uncharge to->res.
+ */
+ res_counter_uncharge(&mc.to->res,
+ PAGE_SIZE * mc.moved_swap);
/* we've already done css_get(mc.to) */
mc.moved_swap = 0;
}
@@ -7057,6 +6349,321 @@ static void __init enable_swap_cgroup(void)
}
#endif
+#ifdef CONFIG_MEMCG_SWAP
+/**
+ * mem_cgroup_swapout - transfer a memsw charge to swap
+ * @page: page whose memsw charge to transfer
+ * @entry: swap entry to move the charge to
+ *
+ * Transfer the memsw charge of @page to @entry.
+ */
+void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+ struct page_cgroup *pc;
+ unsigned short oldid;
+
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ VM_BUG_ON_PAGE(page_count(page), page);
+
+ if (!do_swap_account)
+ return;
+
+ pc = lookup_page_cgroup(page);
+
+ /* Readahead page, never charged */
+ if (!PageCgroupUsed(pc))
+ return;
+
+ VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ VM_BUG_ON_PAGE(oldid, page);
+
+ pc->flags &= ~PCG_MEMSW;
+ css_get(&pc->mem_cgroup->css);
+ mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+}
+
+/**
+ * mem_cgroup_uncharge_swap - uncharge a swap entry
+ * @entry: swap entry to uncharge
+ *
+ * Drop the memsw charge associated with @entry.
+ */
+void mem_cgroup_uncharge_swap(swp_entry_t entry)
+{
+ struct mem_cgroup *memcg;
+ unsigned short id;
+
+ if (!do_swap_account)
+ return;
+
+ id = swap_cgroup_record(entry, 0);
+ rcu_read_lock();
+ memcg = mem_cgroup_lookup(id);
+ if (memcg) {
+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+ mem_cgroup_swap_statistics(memcg, false);
+ css_put(&memcg->css);
+ }
+ rcu_read_unlock();
+}
+#endif
+
+/**
+ * mem_cgroup_try_charge - try charging a page
+ * @page: page to charge
+ * @mm: mm context of the victim
+ * @gfp_mask: reclaim mode
+ * @memcgp: charged memcg return
+ *
+ * Try to charge @page to the memcg that @mm belongs to, reclaiming
+ * pages according to @gfp_mask if necessary.
+ *
+ * Returns 0 on success, with *@memcgp pointing to the charged memcg.
+ * Otherwise, an error code is returned.
+ *
+ * After page->mapping has been set up, the caller must finalize the
+ * charge with mem_cgroup_commit_charge(). Or abort the transaction
+ * with mem_cgroup_cancel_charge() in case page instantiation fails.
+ */
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask, struct mem_cgroup **memcgp)
+{
+ struct mem_cgroup *memcg = NULL;
+ unsigned int nr_pages = 1;
+ int ret = 0;
+
+ if (mem_cgroup_disabled())
+ goto out;
+
+ if (PageSwapCache(page)) {
+ struct page_cgroup *pc = lookup_page_cgroup(page);
+ /*
+ * Every swap fault against a single page tries to charge the
+ * page, bail as early as possible. shmem_unuse() encounters
+ * already charged pages, too. The USED bit is protected by
+ * the page lock, which serializes swap cache removal, which
+ * in turn serializes uncharging.
+ */
+ if (PageCgroupUsed(pc))
+ goto out;
+ }
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ }
+
+ if (do_swap_account && PageSwapCache(page))
+ memcg = try_get_mem_cgroup_from_page(page);
+ if (!memcg)
+ memcg = get_mem_cgroup_from_mm(mm);
+
+ ret = try_charge(memcg, gfp_mask, nr_pages);
+
+ css_put(&memcg->css);
+
+ if (ret == -EINTR) {
+ memcg = root_mem_cgroup;
+ ret = 0;
+ }
+out:
+ *memcgp = memcg;
+ return ret;
+}
+
+/**
+ * mem_cgroup_commit_charge - commit a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ * @lrucare: page might be on LRU already
+ *
+ * Finalize a charge transaction started by mem_cgroup_try_charge(),
+ * after page->mapping has been set up. This must happen atomically
+ * as part of the page instantiation, i.e. under the page table lock
+ * for anonymous pages, under the page lock for page and swap cache.
+ *
+ * In addition, the page must not be on the LRU during the commit, to
+ * prevent racing with task migration. If it might be, use @lrucare.
+ *
+ * Use mem_cgroup_cancel_charge() to cancel the transaction instead.
+ */
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+ bool lrucare)
+{
+ unsigned int nr_pages = 1;
+
+ VM_BUG_ON_PAGE(!page->mapping, page);
+ VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page);
+
+ if (mem_cgroup_disabled())
+ return;
+ /*
+ * Swap faults will attempt to charge the same page multiple
+ * times. But reuse_swap_page() might have removed the page
+ * from swapcache already, so we can't check PageSwapCache().
+ */
+ if (!memcg)
+ return;
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ }
+
+ commit_charge(page, memcg, nr_pages, lrucare);
+
+ if (do_swap_account && PageSwapCache(page)) {
+ swp_entry_t entry = { .val = page_private(page) };
+ /*
+ * The swap entry might not get freed for a long time,
+ * let's not wait for it. The page already received a
+ * memory+swap charge, drop the swap entry duplicate.
+ */
+ mem_cgroup_uncharge_swap(entry);
+ }
+}
+
+/**
+ * mem_cgroup_cancel_charge - cancel a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ *
+ * Cancel a charge transaction started by mem_cgroup_try_charge().
+ */
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
+{
+ unsigned int nr_pages = 1;
+
+ if (mem_cgroup_disabled())
+ return;
+ /*
+ * Swap faults will attempt to charge the same page multiple
+ * times. But reuse_swap_page() might have removed the page
+ * from swapcache already, so we can't check PageSwapCache().
+ */
+ if (!memcg)
+ return;
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ }
+
+ cancel_charge(memcg, nr_pages);
+}
+
+/**
+ * mem_cgroup_uncharge - uncharge a page
+ * @page: page to uncharge
+ *
+ * Uncharge a page previously charged with mem_cgroup_try_charge() and
+ * mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge(struct page *page)
+{
+ struct memcg_batch_info *batch;
+ unsigned int nr_pages = 1;
+ struct mem_cgroup *memcg;
+ struct page_cgroup *pc;
+ unsigned long flags;
+
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ VM_BUG_ON_PAGE(page_count(page), page);
+
+ if (mem_cgroup_disabled())
+ return;
+
+ pc = lookup_page_cgroup(page);
+
+ /* Every final put_page() ends up here */
+ if (!PageCgroupUsed(pc))
+ return;
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ }
+ /*
+ * Nobody should be changing or seriously looking at
+ * pc->mem_cgroup and pc->flags at this point, we have fully
+ * exclusive access to the page.
+ */
+ memcg = pc->mem_cgroup;
+ flags = pc->flags;
+ pc->flags = 0;
+
+ mem_cgroup_charge_statistics(memcg, page, -nr_pages);
+ memcg_check_events(memcg, page);
+
+ batch = &current->memcg_batch;
+ if (!batch->memcg)
+ batch->memcg = memcg;
+ else if (batch->memcg != memcg)
+ goto uncharge;
+ if (nr_pages > 1)
+ goto uncharge;
+ if (!batch->do_batch)
+ goto uncharge;
+ if (test_thread_flag(TIF_MEMDIE))
+ goto uncharge;
+ if (flags & PCG_MEM)
+ batch->nr_pages++;
+ if (flags & PCG_MEMSW)
+ batch->memsw_nr_pages++;
+ return;
+uncharge:
+ if (flags & PCG_MEM)
+ res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
+ if (flags & PCG_MEMSW)
+ res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
+ if (batch->memcg != memcg)
+ memcg_oom_recover(memcg);
+}
+
+/**
+ * mem_cgroup_migrate - migrate a charge to another page
+ * @oldpage: currently charged page
+ * @newpage: page to transfer the charge to
+ * @lrucare: page might be on LRU already
+ *
+ * Migrate the charge from @oldpage to @newpage.
+ *
+ * Both pages must be locked, @newpage->mapping must be set up.
+ */
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+ bool lrucare)
+{
+ unsigned int nr_pages = 1;
+ struct page_cgroup *pc;
+
+ VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
+ VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+ VM_BUG_ON_PAGE(PageLRU(oldpage), oldpage);
+ VM_BUG_ON_PAGE(PageLRU(newpage), newpage);
+ VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
+
+ if (mem_cgroup_disabled())
+ return;
+
+ pc = lookup_page_cgroup(oldpage);
+ if (!PageCgroupUsed(pc))
+ return;
+
+ VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
+ VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
+ pc->flags &= ~(PCG_MEM | PCG_MEMSW);
+
+ if (PageTransHuge(oldpage)) {
+ nr_pages <<= compound_order(oldpage);
+ VM_BUG_ON_PAGE(!PageTransHuge(oldpage), oldpage);
+ VM_BUG_ON_PAGE(!PageTransHuge(newpage), newpage);
+ }
+
+ commit_charge(newpage, pc->mem_cgroup, nr_pages, lrucare);
+}
+
/*
* subsys_initcall() for memory controller.
*
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index cd8989c1027e..a7a89eb2f935 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -202,7 +202,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
#ifdef __ARCH_SI_TRAPNO
si.si_trapno = trapno;
#endif
- si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
+ si.si_addr_lsb = page_size_order(page) + PAGE_SHIFT;
if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
si.si_code = BUS_MCEERR_AR;
@@ -435,7 +435,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
if (av == NULL) /* Not actually mapped anymore */
return;
- pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff = page_pgoff(page);
read_lock(&tasklist_lock);
for_each_process (tsk) {
struct anon_vma_chain *vmac;
@@ -469,7 +469,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
mutex_lock(&mapping->i_mmap_mutex);
read_lock(&tasklist_lock);
for_each_process(tsk) {
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff_t pgoff = page_pgoff(page);
struct task_struct *t = task_early_kill(tsk, force_early);
if (!t)
@@ -895,7 +895,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
struct page *hpage = *hpagep;
struct page *ppage;
- if (PageReserved(p) || PageSlab(p))
+ if (PageReserved(p) || PageSlab(p) || !PageLRU(p))
return SWAP_SUCCESS;
/*
@@ -1159,9 +1159,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
action_result(pfn, "free buddy, 2nd try", DELAYED);
return 0;
}
- action_result(pfn, "non LRU", IGNORED);
- put_page(p);
- return -EBUSY;
}
}
@@ -1194,6 +1191,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
return 0;
}
+ if (!PageHuge(p) && !PageTransTail(p) && !PageLRU(p))
+ goto identify_page_state;
+
/*
* For error on the tail page, we should set PG_hwpoison
* on the head page to show that the hugepage is hwpoisoned
@@ -1243,6 +1243,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
goto out;
}
+identify_page_state:
res = -EBUSY;
/*
* The first check uses the current page flags which may not have any
diff --git a/mm/memory.c b/mm/memory.c
index d67fd9fcf1f2..09e2cd0d7fee 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1292,7 +1292,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
details = NULL;
BUG_ON(addr >= end);
- mem_cgroup_uncharge_start();
tlb_start_vma(tlb, vma);
pgd = pgd_offset(vma->vm_mm, addr);
do {
@@ -1302,7 +1301,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
next = zap_pud_range(tlb, vma, pgd, addr, next, details);
} while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
- mem_cgroup_uncharge_end();
}
@@ -2049,6 +2047,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *dirty_page = NULL;
unsigned long mmun_start = 0; /* For mmu_notifiers */
unsigned long mmun_end = 0; /* For mmu_notifiers */
+ struct mem_cgroup *memcg;
old_page = vm_normal_page(vma, address, orig_pte);
if (!old_page) {
@@ -2204,7 +2203,7 @@ gotten:
}
__SetPageUptodate(new_page);
- if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
+ if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
goto oom_free_new;
mmun_start = address & PAGE_MASK;
@@ -2234,6 +2233,8 @@ gotten:
*/
ptep_clear_flush(vma, address, page_table);
page_add_new_anon_rmap(new_page, vma, address);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
/*
* We call the notify macro here because, when using secondary
* mmu page tables (such as kvm shadow page tables), we want the
@@ -2271,7 +2272,7 @@ gotten:
new_page = old_page;
ret |= VM_FAULT_WRITE;
} else
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
if (new_page)
page_cache_release(new_page);
@@ -2407,10 +2408,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
{
spinlock_t *ptl;
struct page *page, *swapcache;
+ struct mem_cgroup *memcg;
swp_entry_t entry;
pte_t pte;
int locked;
- struct mem_cgroup *ptr;
int exclusive = 0;
int ret = 0;
@@ -2486,7 +2487,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_page;
}
- if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
+ if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) {
ret = VM_FAULT_OOM;
goto out_page;
}
@@ -2511,10 +2512,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
* while the page is counted on swap but not yet in mapcount i.e.
* before page_add_anon_rmap() and swap_free(); try_to_free_swap()
* must be called after the swap_free(), or it will never succeed.
- * Because delete_from_swap_page() may be called by reuse_swap_page(),
- * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
- * in page->private. In this case, a record in swap_cgroup is silently
- * discarded at swap_free().
*/
inc_mm_counter_fast(mm, MM_ANONPAGES);
@@ -2530,12 +2527,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (pte_swp_soft_dirty(orig_pte))
pte = pte_mksoft_dirty(pte);
set_pte_at(mm, address, page_table, pte);
- if (page == swapcache)
+ if (page == swapcache) {
do_page_add_anon_rmap(page, vma, address, exclusive);
- else /* ksm created a completely new copy */
+ mem_cgroup_commit_charge(page, memcg, true);
+ } else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, address);
- /* It's better to call commit-charge after rmap is established */
- mem_cgroup_commit_charge_swapin(page, ptr);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
+ }
swap_free(entry);
if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2568,7 +2567,7 @@ unlock:
out:
return ret;
out_nomap:
- mem_cgroup_cancel_charge_swapin(ptr);
+ mem_cgroup_cancel_charge(page, memcg);
pte_unmap_unlock(page_table, ptl);
out_page:
unlock_page(page);
@@ -2624,6 +2623,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags)
{
+ struct mem_cgroup *memcg;
struct page *page;
spinlock_t *ptl;
pte_t entry;
@@ -2657,7 +2657,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
*/
__SetPageUptodate(page);
- if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL))
+ if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
goto oom_free_page;
entry = mk_pte(page, vma->vm_page_prot);
@@ -2670,6 +2670,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
setpte:
set_pte_at(mm, address, page_table, entry);
@@ -2679,7 +2681,7 @@ unlock:
pte_unmap_unlock(page_table, ptl);
return 0;
release:
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
page_cache_release(page);
goto unlock;
oom_free_page:
@@ -2913,6 +2915,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{
struct page *fault_page, *new_page;
+ struct mem_cgroup *memcg;
spinlock_t *ptl;
pte_t *pte;
int ret;
@@ -2924,7 +2927,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (!new_page)
return VM_FAULT_OOM;
- if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) {
+ if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) {
page_cache_release(new_page);
return VM_FAULT_OOM;
}
@@ -2944,12 +2947,14 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
goto uncharge_out;
}
do_set_pte(vma, address, new_page, pte, true, true);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
unlock_page(fault_page);
page_cache_release(fault_page);
return ret;
uncharge_out:
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
page_cache_release(new_page);
return ret;
}
@@ -2965,6 +2970,8 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int dirtied = 0;
int ret, tmp;
+ WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
+
ret = __do_fault(vma, address, pgoff, flags, &fault_page);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
@@ -2995,6 +3002,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (set_page_dirty(fault_page))
dirtied = 1;
+ /*
+ * Take a local copy of the address_space - page.mapping may be zeroed
+ * by truncate after unlock_page(). The address_space itself remains
+ * pinned by vma->vm_file's reference. We rely on unlock_page()'s
+ * release semantics to prevent the compiler from undoing this copying.
+ */
mapping = fault_page->mapping;
unlock_page(fault_page);
if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) {
@@ -3177,7 +3190,7 @@ static int handle_pte_fault(struct mm_struct *mm,
pte_t entry;
spinlock_t *ptl;
- entry = *pte;
+ entry = ACCESS_ONCE(*pte);
if (!pte_present(entry)) {
if (pte_none(entry)) {
if (vma->vm_ops) {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 469bbf505f85..a3797d3fd8a4 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -284,8 +284,8 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
}
#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
-static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
- unsigned long end_pfn)
+static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn,
+ unsigned long end_pfn)
{
unsigned long old_zone_end_pfn;
@@ -427,8 +427,8 @@ out_fail:
return -1;
}
-static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
- unsigned long end_pfn)
+static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
+ unsigned long end_pfn)
{
unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
@@ -977,15 +977,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
zone = page_zone(pfn_to_page(pfn));
ret = -EINVAL;
- if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
+ if ((zone_idx(zone) > ZONE_NORMAL ||
+ online_type == MMOP_ONLINE_MOVABLE) &&
!can_online_high_movable(zone))
goto out;
- if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
+ if (online_type == MMOP_ONLINE_KERNEL &&
+ zone_idx(zone) == ZONE_MOVABLE) {
if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
goto out;
}
- if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
+ if (online_type == MMOP_ONLINE_MOVABLE &&
+ zone_idx(zone) == ZONE_MOVABLE - 1) {
if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
goto out;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 9e0beaa91845..ab43fbfff8ba 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -778,11 +778,14 @@ static int move_to_new_page(struct page *newpage, struct page *page,
rc = fallback_migrate_page(mapping, newpage, page, mode);
if (rc != MIGRATEPAGE_SUCCESS) {
- newpage->mapping = NULL;
+ if (!PageAnon(newpage))
+ newpage->mapping = NULL;
} else {
if (remap_swapcache)
remove_migration_ptes(page, newpage);
- page->mapping = NULL;
+ if (!PageAnon(page))
+ page->mapping = NULL;
+ mem_cgroup_migrate(page, newpage, false);
}
unlock_page(newpage);
@@ -795,7 +798,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
{
int rc = -EAGAIN;
int remap_swapcache = 1;
- struct mem_cgroup *mem;
struct anon_vma *anon_vma = NULL;
if (!trylock_page(page)) {
@@ -821,9 +823,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
lock_page(page);
}
- /* charge against new page */
- mem_cgroup_prepare_migration(page, newpage, &mem);
-
if (PageWriteback(page)) {
/*
* Only in the case of a full synchronous migration is it
@@ -833,10 +832,10 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
*/
if (mode != MIGRATE_SYNC) {
rc = -EBUSY;
- goto uncharge;
+ goto out_unlock;
}
if (!force)
- goto uncharge;
+ goto out_unlock;
wait_on_page_writeback(page);
}
/*
@@ -872,7 +871,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
*/
remap_swapcache = 0;
} else {
- goto uncharge;
+ goto out_unlock;
}
}
@@ -885,7 +884,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
* the page migration right away (proteced by page lock).
*/
rc = balloon_page_migrate(newpage, page, mode);
- goto uncharge;
+ goto out_unlock;
}
/*
@@ -904,7 +903,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
VM_BUG_ON_PAGE(PageAnon(page), page);
if (page_has_private(page)) {
try_to_free_buffers(page);
- goto uncharge;
+ goto out_unlock;
}
goto skip_unmap;
}
@@ -923,10 +922,7 @@ skip_unmap:
if (anon_vma)
put_anon_vma(anon_vma);
-uncharge:
- mem_cgroup_end_migration(mem, page, newpage,
- (rc == MIGRATEPAGE_SUCCESS ||
- rc == MIGRATEPAGE_BALLOON_SUCCESS));
+out_unlock:
unlock_page(page);
out:
return rc;
@@ -1785,7 +1781,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
pg_data_t *pgdat = NODE_DATA(node);
int isolated = 0;
struct page *new_page = NULL;
- struct mem_cgroup *memcg = NULL;
int page_lru = page_is_file_cache(page);
unsigned long mmun_start = address & HPAGE_PMD_MASK;
unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
@@ -1851,15 +1846,6 @@ fail_putback:
goto out_unlock;
}
- /*
- * Traditional migration needs to prepare the memcg charge
- * transaction early to prevent the old page from being
- * uncharged when installing migration entries. Here we can
- * save the potential rollback and start the charge transfer
- * only when migration is already known to end successfully.
- */
- mem_cgroup_prepare_migration(page, new_page, &memcg);
-
orig_entry = *pmd;
entry = mk_pmd(new_page, vma->vm_page_prot);
entry = pmd_mkhuge(entry);
@@ -1887,14 +1873,10 @@ fail_putback:
goto fail_putback;
}
+ mem_cgroup_migrate(page, new_page, false);
+
page_remove_rmap(page);
- /*
- * Finish the charge transaction under the page table lock to
- * prevent split_huge_page() from dividing up the charge
- * before it's fully transferred to the new page.
- */
- mem_cgroup_end_migration(memcg, page, new_page, true);
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
diff --git a/mm/mmap.c b/mm/mmap.c
index 129b847d30cc..d3decd9cec8c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -134,6 +134,12 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
{
unsigned long free, allowed, reserve;
+#ifdef CONFIG_DEBUG_VM
+ WARN_ONCE(percpu_counter_read(&vm_committed_as) <
+ -(s64)vm_committed_as_batch * num_online_cpus(),
+ "memory commitment underflow");
+#endif
+
vm_acct_memory(pages);
/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 20d17f8266fe..2727a425549d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -816,9 +816,21 @@ void __init init_cma_reserved_pageblock(struct page *page)
set_page_count(p, 0);
} while (++p, --i);
- set_page_refcounted(page);
set_pageblock_migratetype(page, MIGRATE_CMA);
- __free_pages(page, pageblock_order);
+
+ if (pageblock_order >= MAX_ORDER) {
+ i = pageblock_nr_pages;
+ p = page;
+ do {
+ set_page_refcounted(p);
+ __free_pages(p, MAX_ORDER - 1);
+ p += MAX_ORDER_NR_PAGES;
+ } while (i -= MAX_ORDER_NR_PAGES);
+ } else {
+ set_page_refcounted(page);
+ __free_pages(page, pageblock_order);
+ }
+
adjust_managed_page_count(page, pageblock_nr_pages);
}
#endif
@@ -1245,15 +1257,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
unsigned long flags;
- int to_drain;
- unsigned long batch;
+ int to_drain, batch;
local_irq_save(flags);
batch = ACCESS_ONCE(pcp->batch);
- if (pcp->count >= batch)
- to_drain = batch;
- else
- to_drain = pcp->count;
+ to_drain = min(pcp->count, batch);
if (to_drain > 0) {
free_pcppages_bulk(zone, to_drain, pcp);
pcp->count -= to_drain;
@@ -2950,7 +2958,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
* Note this is not alloc_pages_exact_node() which allocates on a specific node,
* but is not exact.
*/
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
{
unsigned order = get_order(size);
struct page *p = alloc_pages_node(nid, gfp_mask, order);
@@ -2958,7 +2966,6 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
return NULL;
return make_alloc_exact((unsigned long)page_address(p), order, size);
}
-EXPORT_SYMBOL(alloc_pages_exact_nid);
/**
* free_pages_exact - release memory allocated via alloc_pages_exact()
@@ -6050,10 +6057,11 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
}
/**
- * get_pageblock_flags_group - Return the requested group of flags for the pageblock_nr_pages block of pages
+ * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages
* @page: The page within the block of interest
- * @start_bitidx: The first bit of interest to retrieve
+ * @pfn: Page Number of the page
* @end_bitidx: The last bit of interest
+ * @mask: The mask for flags
* returns pageblock_bits flags
*/
unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
@@ -6079,9 +6087,10 @@ unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
/**
* set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
* @page: The page within the block of interest
- * @start_bitidx: The first bit of interest
- * @end_bitidx: The last bit of interest
* @flags: The flags to set
+ * @pfn: Page Number of the page
+ * @end_bitidx: The last bit of interest
+ * @mask: The mask for flags
*/
void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long pfn,
diff --git a/mm/readahead.c b/mm/readahead.c
index 0ca36a7770b1..17b9172ec37f 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -326,7 +326,6 @@ static unsigned long get_next_ra_size(struct file_ra_state *ra,
* - thrashing threshold in memory tight systems
*/
static pgoff_t count_history_pages(struct address_space *mapping,
- struct file_ra_state *ra,
pgoff_t offset, unsigned long max)
{
pgoff_t head;
@@ -349,7 +348,7 @@ static int try_context_readahead(struct address_space *mapping,
{
pgoff_t size;
- size = count_history_pages(mapping, ra, offset, max);
+ size = count_history_pages(mapping, offset, max);
/*
* not enough history pages:
diff --git a/mm/rmap.c b/mm/rmap.c
index b7e94ebbd09e..7928ddd91b6e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -517,11 +517,7 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
static inline unsigned long
__vma_address(struct page *page, struct vm_area_struct *vma)
{
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-
- if (unlikely(is_vm_hugetlb_page(vma)))
- pgoff = page->index << huge_page_order(page_hstate(page));
-
+ pgoff_t pgoff = page_pgoff(page);
return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
}
@@ -1036,25 +1032,6 @@ void page_add_new_anon_rmap(struct page *page,
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
hpage_nr_pages(page));
__page_set_anon_rmap(page, vma, address, 1);
-
- VM_BUG_ON_PAGE(PageLRU(page), page);
- if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
- SetPageActive(page);
- lru_cache_add(page);
- return;
- }
-
- if (!TestSetPageMlocked(page)) {
- /*
- * We use the irq-unsafe __mod_zone_page_stat because this
- * counter is not modified from interrupt context, and the pte
- * lock is held(spinlock), which implies preemption disabled.
- */
- __mod_zone_page_state(page_zone(page), NR_MLOCK,
- hpage_nr_pages(page));
- count_vm_event(UNEVICTABLE_PGMLOCKED);
- }
- add_page_to_unevictable_list(page);
}
/**
@@ -1112,7 +1089,6 @@ void page_remove_rmap(struct page *page)
if (unlikely(PageHuge(page)))
goto out;
if (anon) {
- mem_cgroup_uncharge_page(page);
if (PageTransHuge(page))
__dec_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES);
@@ -1639,7 +1615,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
{
struct anon_vma *anon_vma;
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff_t pgoff = page_pgoff(page);
struct anon_vma_chain *avc;
int ret = SWAP_AGAIN;
@@ -1680,7 +1656,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
{
struct address_space *mapping = page->mapping;
- pgoff_t pgoff = page->index << compound_order(page);
+ pgoff_t pgoff = page_pgoff(page);
struct vm_area_struct *vma;
int ret = SWAP_AGAIN;
diff --git a/mm/shmem.c b/mm/shmem.c
index 8f419cff9e34..5e5d8601c712 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -149,6 +149,19 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
vm_unacct_memory(VM_ACCT(size));
}
+static inline int shmem_reacct_size(unsigned long flags,
+ loff_t oldsize, loff_t newsize)
+{
+ if (!(flags & VM_NORESERVE)) {
+ if (VM_ACCT(newsize) > VM_ACCT(oldsize))
+ return security_vm_enough_memory_mm(current->mm,
+ VM_ACCT(newsize) - VM_ACCT(oldsize));
+ else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
+ vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
+ }
+ return 0;
+}
+
/*
* ... whereas tmpfs objects are accounted incrementally as
* pages are allocated, in order to allow huge sparse files.
@@ -406,7 +419,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
pvec.pages, indices);
if (!pvec.nr)
break;
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -434,7 +446,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
@@ -485,7 +496,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
pagevec_release(&pvec);
break;
}
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -512,7 +522,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
index++;
}
@@ -543,6 +552,10 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
loff_t newsize = attr->ia_size;
if (newsize != oldsize) {
+ error = shmem_reacct_size(SHMEM_I(inode)->flags,
+ oldsize, newsize);
+ if (error)
+ return error;
i_size_write(inode, newsize);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
}
@@ -669,6 +682,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
{
struct list_head *this, *next;
struct shmem_inode_info *info;
+ struct mem_cgroup *memcg;
int found = 0;
int error = 0;
@@ -684,7 +698,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
* the shmem_swaplist_mutex which might hold up shmem_writepage().
* Charged back to the user (not to caller) when swap account is used.
*/
- error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL);
+ error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg);
if (error)
goto out;
/* No radix_tree_preload: swap entry keeps a place for page in tree */
@@ -702,8 +716,11 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
}
mutex_unlock(&shmem_swaplist_mutex);
- if (found < 0)
+ if (found < 0) {
error = found;
+ mem_cgroup_cancel_charge(page, memcg);
+ } else
+ mem_cgroup_commit_charge(page, memcg, true);
out:
unlock_page(page);
page_cache_release(page);
@@ -807,7 +824,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
}
mutex_unlock(&shmem_swaplist_mutex);
- swapcache_free(swap, NULL);
+ swapcache_free(swap);
redirty:
set_page_dirty(page);
if (wbc->for_reclaim)
@@ -980,7 +997,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
*/
oldpage = newpage;
} else {
- mem_cgroup_replace_page_cache(oldpage, newpage);
+ mem_cgroup_migrate(oldpage, newpage, false);
lru_cache_add_anon(newpage);
*pagep = newpage;
}
@@ -1007,6 +1024,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo;
+ struct mem_cgroup *memcg;
struct page *page;
swp_entry_t swap;
int error;
@@ -1082,8 +1100,7 @@ repeat:
goto failed;
}
- error = mem_cgroup_charge_file(page, current->mm,
- gfp & GFP_RECLAIM_MASK);
+ error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
gfp, swp_to_radix_entry(swap));
@@ -1099,12 +1116,16 @@ repeat:
* Reset swap.val? No, leave it so "failed" goes back to
* "repeat": reading a hole and writing should succeed.
*/
- if (error)
+ if (error) {
+ mem_cgroup_cancel_charge(page, memcg);
delete_from_swap_cache(page);
+ }
}
if (error)
goto failed;
+ mem_cgroup_commit_charge(page, memcg, true);
+
spin_lock(&info->lock);
info->swapped--;
shmem_recalc_inode(inode);
@@ -1136,8 +1157,7 @@ repeat:
__SetPageSwapBacked(page);
__set_page_locked(page);
- error = mem_cgroup_charge_file(page, current->mm,
- gfp & GFP_RECLAIM_MASK);
+ error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
if (error)
goto decused;
error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
@@ -1147,9 +1167,10 @@ repeat:
radix_tree_preload_end();
}
if (error) {
- mem_cgroup_uncharge_cache_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
goto decused;
}
+ mem_cgroup_commit_charge(page, memcg, false);
lru_cache_add_anon(page);
spin_lock(&info->lock);
@@ -2895,16 +2916,16 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
this.len = strlen(name);
this.hash = 0; /* will go */
sb = shm_mnt->mnt_sb;
+ path.mnt = mntget(shm_mnt);
path.dentry = d_alloc_pseudo(sb, &this);
if (!path.dentry)
goto put_memory;
d_set_d_op(path.dentry, &anon_ops);
- path.mnt = mntget(shm_mnt);
res = ERR_PTR(-ENOSPC);
inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
if (!inode)
- goto put_dentry;
+ goto put_memory;
inode->i_flags |= i_flags;
d_instantiate(path.dentry, inode);
@@ -2912,19 +2933,19 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
clear_nlink(inode); /* It is unlinked */
res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
if (IS_ERR(res))
- goto put_dentry;
+ goto put_path;
res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
&shmem_file_operations);
if (IS_ERR(res))
- goto put_dentry;
+ goto put_path;
return res;
-put_dentry:
- path_put(&path);
put_memory:
shmem_unacct_size(flags, size);
+put_path:
+ path_put(&path);
return res;
}
diff --git a/mm/slab.c b/mm/slab.c
index 3070b929a1bf..179272f262f2 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -267,7 +267,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
#define MAKE_LIST(cachep, listp, slab, nodeid) \
do { \
INIT_LIST_HEAD(listp); \
- list_splice(&(cachep->node[nodeid]->slab), listp); \
+ list_splice(&get_node(cachep, nodeid)->slab, listp); \
} while (0)
#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
@@ -488,16 +488,11 @@ static struct lock_class_key debugobj_alc_key;
static void slab_set_lock_classes(struct kmem_cache *cachep,
struct lock_class_key *l3_key, struct lock_class_key *alc_key,
- int q)
+ struct kmem_cache_node *n)
{
struct array_cache **alc;
- struct kmem_cache_node *n;
int r;
- n = cachep->node[q];
- if (!n)
- return;
-
lockdep_set_class(&n->list_lock, l3_key);
alc = n->alien;
/*
@@ -515,17 +510,19 @@ static void slab_set_lock_classes(struct kmem_cache *cachep,
}
}
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
+static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep,
+ struct kmem_cache_node *n)
{
- slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
+ slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, n);
}
static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
{
int node;
+ struct kmem_cache_node *n;
- for_each_online_node(node)
- slab_set_debugobj_lock_classes_node(cachep, node);
+ for_each_kmem_cache_node(cachep, node, n)
+ slab_set_debugobj_lock_classes_node(cachep, n);
}
static void init_node_lock_keys(int q)
@@ -542,34 +539,33 @@ static void init_node_lock_keys(int q)
if (!cache)
continue;
- n = cache->node[q];
+ n = get_node(cache, q);
if (!n || OFF_SLAB(cache))
continue;
slab_set_lock_classes(cache, &on_slab_l3_key,
- &on_slab_alc_key, q);
+ &on_slab_alc_key, n);
}
}
-static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
+static void on_slab_lock_classes_node(struct kmem_cache *cachep,
+ struct kmem_cache_node *n)
{
- if (!cachep->node[q])
- return;
-
slab_set_lock_classes(cachep, &on_slab_l3_key,
- &on_slab_alc_key, q);
+ &on_slab_alc_key, n);
}
static inline void on_slab_lock_classes(struct kmem_cache *cachep)
{
int node;
+ struct kmem_cache_node *n;
VM_BUG_ON(OFF_SLAB(cachep));
- for_each_node(node)
- on_slab_lock_classes_node(cachep, node);
+ for_each_kmem_cache_node(cachep, node, n)
+ on_slab_lock_classes_node(cachep, n);
}
-static inline void init_lock_keys(void)
+static inline void __init init_lock_keys(void)
{
int node;
@@ -577,7 +573,7 @@ static inline void init_lock_keys(void)
init_node_lock_keys(node);
}
#else
-static void init_node_lock_keys(int q)
+static void __init init_node_lock_keys(int q)
{
}
@@ -589,11 +585,13 @@ static inline void on_slab_lock_classes(struct kmem_cache *cachep)
{
}
-static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
+static inline void on_slab_lock_classes_node(struct kmem_cache *cachep,
+ struct kmem_cache_node *n)
{
}
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
+static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep,
+ struct kmem_cache_node *n)
{
}
@@ -826,7 +824,7 @@ static inline bool is_slab_pfmemalloc(struct page *page)
static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
struct array_cache *ac)
{
- struct kmem_cache_node *n = cachep->node[numa_mem_id()];
+ struct kmem_cache_node *n = get_node(cachep, numa_mem_id());
struct page *page;
unsigned long flags;
@@ -881,7 +879,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
* If there are empty slabs on the slabs_free list and we are
* being forced to refill the cache, mark this one !pfmemalloc.
*/
- n = cachep->node[numa_mem_id()];
+ n = get_node(cachep, numa_mem_id());
if (!list_empty(&n->slabs_free) && force_refill) {
struct page *page = virt_to_head_page(objp);
ClearPageSlabPfmemalloc(page);
@@ -1031,7 +1029,7 @@ static void free_alien_cache(struct array_cache **ac_ptr)
static void __drain_alien_cache(struct kmem_cache *cachep,
struct array_cache *ac, int node)
{
- struct kmem_cache_node *n = cachep->node[node];
+ struct kmem_cache_node *n = get_node(cachep, node);
if (ac->avail) {
spin_lock(&n->list_lock);
@@ -1099,7 +1097,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
if (likely(nodeid == node))
return 0;
- n = cachep->node[node];
+ n = get_node(cachep, node);
STATS_INC_NODEFREES(cachep);
if (n->alien && n->alien[nodeid]) {
alien = n->alien[nodeid];
@@ -1111,9 +1109,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
ac_put_obj(cachep, alien, objp);
spin_unlock(&alien->lock);
} else {
- spin_lock(&(cachep->node[nodeid])->list_lock);
+ n = get_node(cachep, nodeid);
+ spin_lock(&n->list_lock);
free_block(cachep, &objp, 1, nodeid);
- spin_unlock(&(cachep->node[nodeid])->list_lock);
+ spin_unlock(&n->list_lock);
}
return 1;
}
@@ -1140,7 +1139,8 @@ static int init_cache_node_node(int node)
* begin anything. Make sure some other cpu on this
* node has not already allocated this
*/
- if (!cachep->node[node]) {
+ n = get_node(cachep, node);
+ if (!n) {
n = kmalloc_node(memsize, GFP_KERNEL, node);
if (!n)
return -ENOMEM;
@@ -1156,11 +1156,13 @@ static int init_cache_node_node(int node)
cachep->node[node] = n;
}
- spin_lock_irq(&cachep->node[node]->list_lock);
- cachep->node[node]->free_limit =
- (1 + nr_cpus_node(node)) *
- cachep->batchcount + cachep->num;
- spin_unlock_irq(&cachep->node[node]->list_lock);
+ if (!memcg_cache_dead(cachep)) {
+ spin_lock_irq(&n->list_lock);
+ n->free_limit =
+ (1 + nr_cpus_node(node)) *
+ cachep->batchcount + cachep->num;
+ spin_unlock_irq(&n->list_lock);
+ }
}
return 0;
}
@@ -1186,7 +1188,7 @@ static void cpuup_canceled(long cpu)
/* cpu is dead; no one can alloc from it. */
nc = cachep->array[cpu];
cachep->array[cpu] = NULL;
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
goto free_array_cache;
@@ -1194,7 +1196,8 @@ static void cpuup_canceled(long cpu)
spin_lock_irq(&n->list_lock);
/* Free limit for this kmem_cache_node */
- n->free_limit -= cachep->batchcount;
+ if (!memcg_cache_dead(cachep))
+ n->free_limit -= cachep->batchcount;
if (nc)
free_block(cachep, nc->entry, nc->avail, node);
@@ -1229,7 +1232,7 @@ free_array_cache:
* shrink each nodelist to its limit.
*/
list_for_each_entry(cachep, &slab_caches, list) {
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
continue;
drain_freelist(cachep, n, slabs_tofree(cachep, n));
@@ -1262,6 +1265,9 @@ static int cpuup_prepare(long cpu)
struct array_cache *shared = NULL;
struct array_cache **alien = NULL;
+ if (memcg_cache_dead(cachep))
+ continue;
+
nc = alloc_arraycache(node, cachep->limit,
cachep->batchcount, GFP_KERNEL);
if (!nc)
@@ -1284,7 +1290,7 @@ static int cpuup_prepare(long cpu)
}
}
cachep->array[cpu] = nc;
- n = cachep->node[node];
+ n = get_node(cachep, node);
BUG_ON(!n);
spin_lock_irq(&n->list_lock);
@@ -1306,10 +1312,10 @@ static int cpuup_prepare(long cpu)
kfree(shared);
free_alien_cache(alien);
if (cachep->flags & SLAB_DEBUG_OBJECTS)
- slab_set_debugobj_lock_classes_node(cachep, node);
+ slab_set_debugobj_lock_classes_node(cachep, n);
else if (!OFF_SLAB(cachep) &&
!(cachep->flags & SLAB_DESTROY_BY_RCU))
- on_slab_lock_classes_node(cachep, node);
+ on_slab_lock_classes_node(cachep, n);
}
init_node_lock_keys(node);
@@ -1395,7 +1401,7 @@ static int __meminit drain_cache_node_node(int node)
list_for_each_entry(cachep, &slab_caches, list) {
struct kmem_cache_node *n;
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
continue;
@@ -1690,14 +1696,10 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n",
cachep->name, cachep->size, cachep->gfporder);
- for_each_online_node(node) {
+ for_each_kmem_cache_node(cachep, node, n) {
unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
unsigned long active_slabs = 0, num_slabs = 0;
- n = cachep->node[node];
- if (!n)
- continue;
-
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->slabs_full, lru) {
active_objs += cachep->num;
@@ -2434,7 +2436,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
+ assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
#endif
}
@@ -2442,7 +2444,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&cachep->node[node]->list_lock);
+ assert_spin_locked(&get_node(cachep, node)->list_lock);
#endif
}
@@ -2462,13 +2464,22 @@ static void do_drain(void *arg)
struct kmem_cache *cachep = arg;
struct array_cache *ac;
int node = numa_mem_id();
+ struct kmem_cache_node *n;
check_irq_off();
ac = cpu_cache_get(cachep);
- spin_lock(&cachep->node[node]->list_lock);
+ if (!ac)
+ return;
+
+ n = get_node(cachep, node);
+ spin_lock(&n->list_lock);
free_block(cachep, ac->entry, ac->avail, node);
- spin_unlock(&cachep->node[node]->list_lock);
+ spin_unlock(&n->list_lock);
ac->avail = 0;
+ if (memcg_cache_dead(cachep)) {
+ cachep->array[smp_processor_id()] = NULL;
+ kfree(ac);
+ }
}
static void drain_cpu_caches(struct kmem_cache *cachep)
@@ -2478,17 +2489,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
on_each_cpu(do_drain, cachep, 1);
check_irq_on();
- for_each_online_node(node) {
- n = cachep->node[node];
- if (n && n->alien)
+ for_each_kmem_cache_node(cachep, node, n)
+ if (n->alien)
drain_alien_cache(cachep, n->alien);
- }
- for_each_online_node(node) {
- n = cachep->node[node];
- if (n)
- drain_array(cachep, n, n->shared, 1, node);
- }
+ for_each_kmem_cache_node(cachep, node, n)
+ drain_array(cachep, n, n->shared, 1, node);
}
/*
@@ -2534,16 +2540,19 @@ out:
int __kmem_cache_shrink(struct kmem_cache *cachep)
{
- int ret = 0, i = 0;
+ int ret = 0;
+ int node;
struct kmem_cache_node *n;
drain_cpu_caches(cachep);
check_irq_on();
- for_each_online_node(i) {
- n = cachep->node[i];
- if (!n)
- continue;
+ for_each_kmem_cache_node(cachep, node, n) {
+ if (memcg_cache_dead(cachep)) {
+ spin_lock_irq(&n->list_lock);
+ n->free_limit = 0;
+ spin_unlock_irq(&n->list_lock);
+ }
drain_freelist(cachep, n, slabs_tofree(cachep, n));
@@ -2566,13 +2575,11 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
kfree(cachep->array[i]);
/* NUMA: free the node structures */
- for_each_online_node(i) {
- n = cachep->node[i];
- if (n) {
- kfree(n->shared);
- free_alien_cache(n->alien);
- kfree(n);
- }
+ for_each_kmem_cache_node(cachep, i, n) {
+ kfree(n->shared);
+ free_alien_cache(n->alien);
+ kfree(n);
+ cachep->node[i] = NULL;
}
return 0;
}
@@ -2751,7 +2758,7 @@ static int cache_grow(struct kmem_cache *cachep,
/* Take the node list lock to change the colour_next on this node */
check_irq_off();
- n = cachep->node[nodeid];
+ n = get_node(cachep, nodeid);
spin_lock(&n->list_lock);
/* Get colour for the slab, and cal the next value. */
@@ -2920,7 +2927,7 @@ retry:
*/
batchcount = BATCHREFILL_LIMIT;
}
- n = cachep->node[node];
+ n = get_node(cachep, node);
BUG_ON(ac->avail > 0 || !n);
spin_lock(&n->list_lock);
@@ -3169,8 +3176,8 @@ retry:
nid = zone_to_nid(zone);
if (cpuset_zone_allowed_hardwall(zone, flags) &&
- cache->node[nid] &&
- cache->node[nid]->free_objects) {
+ get_node(cache, nid) &&
+ get_node(cache, nid)->free_objects) {
obj = ____cache_alloc_node(cache,
flags | GFP_THISNODE, nid);
if (obj)
@@ -3233,7 +3240,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
int x;
VM_BUG_ON(nodeid > num_online_nodes());
- n = cachep->node[nodeid];
+ n = get_node(cachep, nodeid);
BUG_ON(!n);
retry:
@@ -3304,7 +3311,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
if (nodeid == NUMA_NO_NODE)
nodeid = slab_node;
- if (unlikely(!cachep->node[nodeid])) {
+ if (unlikely(!get_node(cachep, nodeid))) {
/* Node not bootstrapped yet */
ptr = fallback_alloc(cachep, flags);
goto out;
@@ -3420,7 +3427,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
objp = objpp[i];
page = virt_to_head_page(objp);
- n = cachep->node[node];
+ n = get_node(cachep, node);
list_del(&page->lru);
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp, node);
@@ -3462,7 +3469,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
BUG_ON(!batchcount || batchcount > ac->avail);
#endif
check_irq_off();
- n = cachep->node[node];
+ n = get_node(cachep, node);
spin_lock(&n->list_lock);
if (n->shared) {
struct array_cache *shared_array = n->shared;
@@ -3511,12 +3518,29 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
{
struct array_cache *ac = cpu_cache_get(cachep);
+ /*
+ * Since we free objects with irqs and therefore preemption disabled,
+ * we can use synchronize_sched() to wait for all currently executing
+ * kfree's to finish. This is necessary to avoid use-after-free on
+ * per memcg cache destruction.
+ */
check_irq_off();
kmemleak_free_recursive(objp, cachep->flags);
objp = cache_free_debugcheck(cachep, objp, caller);
kmemcheck_slab_free(cachep, objp, cachep->object_size);
+#ifdef CONFIG_MEMCG_KMEM
+ if (unlikely(!ac)) {
+ int nodeid = page_to_nid(virt_to_page(objp));
+
+ spin_lock(&cachep->node[nodeid]->list_lock);
+ free_block(cachep, &objp, 1, nodeid);
+ spin_unlock(&cachep->node[nodeid]->list_lock);
+ return;
+ }
+#endif
+
/*
* Skip calling cache_free_alien() when the platform is not numa.
* This will avoid cache misses that happen while accessing slabp (which
@@ -3775,7 +3799,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
}
}
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (n) {
struct array_cache *shared = n->shared;
@@ -3820,9 +3844,8 @@ fail:
/* Cache is not active yet. Roll back what we did */
node--;
while (node >= 0) {
- if (cachep->node[node]) {
- n = cachep->node[node];
-
+ n = get_node(cachep, node);
+ if (n) {
kfree(n->shared);
free_alien_cache(n->alien);
kfree(n);
@@ -3858,6 +3881,9 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
struct ccupdate_struct *new;
int i;
+ if (memcg_cache_dead(cachep))
+ return 0;
+
new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
gfp);
if (!new)
@@ -3884,11 +3910,17 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
for_each_online_cpu(i) {
struct array_cache *ccold = new->new[i];
+ int node;
+ struct kmem_cache_node *n;
+
if (!ccold)
continue;
- spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
- free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
- spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
+
+ node = cpu_to_mem(i);
+ n = get_node(cachep, node);
+ spin_lock_irq(&n->list_lock);
+ free_block(cachep, ccold->entry, ccold->avail, node);
+ spin_unlock_irq(&n->list_lock);
kfree(ccold);
}
kfree(new);
@@ -4043,12 +4075,15 @@ static void cache_reap(struct work_struct *w)
list_for_each_entry(searchp, &slab_caches, list) {
check_irq_on();
+ if (memcg_cache_dead(searchp))
+ continue;
+
/*
* We only take the node lock if absolutely necessary and we
* have established with reasonable certainty that
* we can do some work if the lock was obtained.
*/
- n = searchp->node[node];
+ n = get_node(searchp, node);
reap_alien(searchp, n);
@@ -4100,10 +4135,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
active_objs = 0;
num_slabs = 0;
- for_each_online_node(node) {
- n = cachep->node[node];
- if (!n)
- continue;
+ for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
spin_lock_irq(&n->list_lock);
@@ -4328,10 +4360,7 @@ static int leaks_show(struct seq_file *m, void *p)
x[1] = 0;
- for_each_online_node(node) {
- n = cachep->node[node];
- if (!n)
- continue;
+ for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
spin_lock_irq(&n->list_lock);
diff --git a/mm/slab.h b/mm/slab.h
index 961a3fb1f5a2..4789ca4b8721 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -121,6 +121,26 @@ static inline bool is_root_cache(struct kmem_cache *s)
return !s->memcg_params || s->memcg_params->is_root_cache;
}
+static inline bool memcg_cache_dead(struct kmem_cache *s)
+{
+ if (is_root_cache(s))
+ return false;
+
+ /*
+ * Since this function can be called without holding any locks, it
+ * needs a barrier here to guarantee the read won't be reordered.
+ */
+ smp_rmb();
+ return s->memcg_params->dead;
+}
+
+static inline void memcg_cache_mark_dead(struct kmem_cache *s)
+{
+ BUG_ON(is_root_cache(s));
+ s->memcg_params->dead = true;
+ smp_wmb(); /* matches rmb in memcg_cache_dead() */
+}
+
static inline bool slab_equal_or_root(struct kmem_cache *s,
struct kmem_cache *p)
{
@@ -203,6 +223,11 @@ static inline bool is_root_cache(struct kmem_cache *s)
return true;
}
+static inline bool memcg_cache_dead(struct kmem_cache *s)
+{
+ return false;
+}
+
static inline bool slab_equal_or_root(struct kmem_cache *s,
struct kmem_cache *p)
{
@@ -260,9 +285,8 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
WARN_ON_ONCE(1);
return s;
}
-#endif
-
+#ifndef CONFIG_SLOB
/*
* The slab lists for all objects.
*/
@@ -294,5 +318,22 @@ struct kmem_cache_node {
};
+static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
+{
+ return s->node[node];
+}
+
+/*
+ * Iterator over all nodes. The body will be executed for each node that has
+ * a kmem_cache_node structure allocated (which is true for all online nodes)
+ */
+#define for_each_kmem_cache_node(__s, __node, __n) \
+ for (__node = 0; __n = get_node(__s, __node), __node < nr_node_ids; __node++) \
+ if (__n)
+
+#endif
+
void *slab_next(struct seq_file *m, void *p, loff_t *pos);
void slab_stop(struct seq_file *m, void *p);
+
+#endif /* MM_SLAB_H */
diff --git a/mm/slub.c b/mm/slub.c
index 1d793d17107c..6641a8fc63d1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -233,11 +233,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
* Core slab cache functions
*******************************************************************/
-static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
-{
- return s->node[node];
-}
-
/* Verify that a pointer has an address that is valid within a slab page */
static inline int check_valid_pointer(struct kmem_cache *s,
struct page *page, const void *object)
@@ -945,60 +940,6 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
}
/*
- * Hooks for other subsystems that check memory allocations. In a typical
- * production configuration these hooks all should produce no code at all.
- */
-static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
-{
- kmemleak_alloc(ptr, size, 1, flags);
-}
-
-static inline void kfree_hook(const void *x)
-{
- kmemleak_free(x);
-}
-
-static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
-{
- flags &= gfp_allowed_mask;
- lockdep_trace_alloc(flags);
- might_sleep_if(flags & __GFP_WAIT);
-
- return should_failslab(s->object_size, flags, s->flags);
-}
-
-static inline void slab_post_alloc_hook(struct kmem_cache *s,
- gfp_t flags, void *object)
-{
- flags &= gfp_allowed_mask;
- kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
- kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
-}
-
-static inline void slab_free_hook(struct kmem_cache *s, void *x)
-{
- kmemleak_free_recursive(x, s->flags);
-
- /*
- * Trouble is that we may no longer disable interrupts in the fast path
- * So in order to make the debug calls that expect irqs to be
- * disabled we need to disable interrupts temporarily.
- */
-#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
- {
- unsigned long flags;
-
- local_irq_save(flags);
- kmemcheck_slab_free(s, x, s->object_size);
- debug_check_no_locks_freed(x, s->object_size);
- local_irq_restore(flags);
- }
-#endif
- if (!(s->flags & SLAB_DEBUG_OBJECTS))
- debug_check_no_obj_freed(x, s->object_size);
-}
-
-/*
* Tracking of fully allocated slabs for debugging purposes.
*/
static void add_full(struct kmem_cache *s,
@@ -1282,6 +1223,12 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
+#endif /* CONFIG_SLUB_DEBUG */
+
+/*
+ * Hooks for other subsystems that check memory allocations. In a typical
+ * production configuration these hooks all should produce no code at all.
+ */
static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
{
kmemleak_alloc(ptr, size, 1, flags);
@@ -1293,21 +1240,44 @@ static inline void kfree_hook(const void *x)
}
static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
- { return 0; }
+{
+ flags &= gfp_allowed_mask;
+ lockdep_trace_alloc(flags);
+ might_sleep_if(flags & __GFP_WAIT);
+
+ return should_failslab(s->object_size, flags, s->flags);
+}
-static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
- void *object)
+static inline void slab_post_alloc_hook(struct kmem_cache *s,
+ gfp_t flags, void *object)
{
- kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
- flags & gfp_allowed_mask);
+ flags &= gfp_allowed_mask;
+ kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
+ kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
}
static inline void slab_free_hook(struct kmem_cache *s, void *x)
{
kmemleak_free_recursive(x, s->flags);
-}
-#endif /* CONFIG_SLUB_DEBUG */
+ /*
+ * Trouble is that we may no longer disable interrupts in the fast path
+ * So in order to make the debug calls that expect irqs to be
+ * disabled we need to disable interrupts temporarily.
+ */
+#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
+ {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ kmemcheck_slab_free(s, x, s->object_size);
+ debug_check_no_locks_freed(x, s->object_size);
+ local_irq_restore(flags);
+ }
+#endif
+ if (!(s->flags & SLAB_DEBUG_OBJECTS))
+ debug_check_no_obj_freed(x, s->object_size);
+}
/*
* Slab allocation and freeing
@@ -1433,7 +1403,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
memset(start, POISON_INUSE, PAGE_SIZE << order);
last = start;
- for_each_object(p, s, start, page->objects) {
+ for_each_object(p, s, start + s->size, page->objects - 1) {
setup_object(s, page, last);
set_freepointer(s, last, p);
last = p;
@@ -1881,7 +1851,7 @@ redo:
new.frozen = 0;
- if (!new.inuse && n->nr_partial > s->min_partial)
+ if (!new.inuse && n->nr_partial >= s->min_partial)
m = M_FREE;
else if (new.freelist) {
m = M_PARTIAL;
@@ -1992,7 +1962,7 @@ static void unfreeze_partials(struct kmem_cache *s,
new.freelist, new.counters,
"unfreezing slab"));
- if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
+ if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
page->next = discard_page;
discard_page = page;
} else {
@@ -2064,6 +2034,14 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
!= oldpage);
+
+ if (memcg_cache_dead(s)) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
+ local_irq_restore(flags);
+ }
#endif
}
@@ -2162,6 +2140,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
int node;
+ struct kmem_cache_node *n;
if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
return;
@@ -2176,15 +2155,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
s->name);
- for_each_online_node(node) {
- struct kmem_cache_node *n = get_node(s, node);
+ for_each_kmem_cache_node(s, node, n) {
unsigned long nr_slabs;
unsigned long nr_objs;
unsigned long nr_free;
- if (!n)
- continue;
-
nr_free = count_partial(n, count_free);
nr_slabs = node_nr_slabs(n);
nr_objs = node_nr_objs(n);
@@ -2620,7 +2595,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
return;
}
- if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
+ if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
goto slab_empty;
/*
@@ -2673,18 +2648,17 @@ static __always_inline void slab_free(struct kmem_cache *s,
slab_free_hook(s, x);
-redo:
/*
- * Determine the currently cpus per cpu slab.
- * The cpu may change afterward. However that does not matter since
- * data is retrieved via this pointer. If we are on the same cpu
- * during the cmpxchg then the free will succedd.
+ * We could make this function fully preemptable, but then we wouldn't
+ * have a method to wait for all currently executing kfree's to finish,
+ * which is necessary to avoid use-after-free on per memcg cache
+ * destruction.
*/
preempt_disable();
+redo:
c = this_cpu_ptr(s->cpu_slab);
tid = c->tid;
- preempt_enable();
if (likely(page == c->page)) {
set_freepointer(s, object, c->freelist);
@@ -2701,6 +2675,7 @@ redo:
} else
__slab_free(s, page, x, addr);
+ preempt_enable();
}
void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -2928,13 +2903,10 @@ static void early_kmem_cache_node_alloc(int node)
static void free_kmem_cache_nodes(struct kmem_cache *s)
{
int node;
+ struct kmem_cache_node *n;
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = s->node[node];
-
- if (n)
- kmem_cache_free(kmem_cache_node, n);
-
+ for_each_kmem_cache_node(s, node, n) {
+ kmem_cache_free(kmem_cache_node, n);
s->node[node] = NULL;
}
}
@@ -3224,12 +3196,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
static inline int kmem_cache_close(struct kmem_cache *s)
{
int node;
+ struct kmem_cache_node *n;
flush_all(s);
/* Attempt to free all objects */
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
-
+ for_each_kmem_cache_node(s, node, n) {
free_partial(s, n);
if (n->nr_partial || slabs_node(s, node))
return 1;
@@ -3406,20 +3377,26 @@ int __kmem_cache_shrink(struct kmem_cache *s)
struct page *page;
struct page *t;
int objects = oo_objects(s->max);
+ struct list_head empty_slabs;
struct list_head *slabs_by_inuse =
kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
unsigned long flags;
- if (!slabs_by_inuse)
- return -ENOMEM;
+ if (memcg_cache_dead(s))
+ s->min_partial = 0;
- flush_all(s);
- for_each_node_state(node, N_NORMAL_MEMORY) {
- n = get_node(s, node);
-
- if (!n->nr_partial)
- continue;
+ if (!slabs_by_inuse) {
+ /*
+ * Do not fail shrinking empty slabs if allocation of the
+ * temporary array failed. Just skip the slab placement
+ * optimization then.
+ */
+ slabs_by_inuse = &empty_slabs;
+ objects = 1;
+ }
+ flush_all(s);
+ for_each_kmem_cache_node(s, node, n) {
for (i = 0; i < objects; i++)
INIT_LIST_HEAD(slabs_by_inuse + i);
@@ -3432,7 +3409,9 @@ int __kmem_cache_shrink(struct kmem_cache *s)
* list_lock. page->inuse here is the upper limit.
*/
list_for_each_entry_safe(page, t, &n->partial, lru) {
- list_move(&page->lru, slabs_by_inuse + page->inuse);
+ if (page->inuse < objects)
+ list_move(&page->lru,
+ slabs_by_inuse + page->inuse);
if (!page->inuse)
n->nr_partial--;
}
@@ -3451,7 +3430,8 @@ int __kmem_cache_shrink(struct kmem_cache *s)
discard_slab(s, page);
}
- kfree(slabs_by_inuse);
+ if (slabs_by_inuse != &empty_slabs)
+ kfree(slabs_by_inuse);
return 0;
}
@@ -3588,6 +3568,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
{
int node;
struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ struct kmem_cache_node *n;
memcpy(s, static_cache, kmem_cache->object_size);
@@ -3597,19 +3578,16 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
* IPIs around.
*/
__flush_cpu_slab(s, smp_processor_id());
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ for_each_kmem_cache_node(s, node, n) {
struct page *p;
- if (n) {
- list_for_each_entry(p, &n->partial, lru)
- p->slab_cache = s;
+ list_for_each_entry(p, &n->partial, lru)
+ p->slab_cache = s;
#ifdef CONFIG_SLUB_DEBUG
- list_for_each_entry(p, &n->full, lru)
- p->slab_cache = s;
+ list_for_each_entry(p, &n->full, lru)
+ p->slab_cache = s;
#endif
- }
}
list_add(&s->list, &slab_caches);
return s;
@@ -3962,16 +3940,14 @@ static long validate_slab_cache(struct kmem_cache *s)
unsigned long count = 0;
unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
sizeof(unsigned long), GFP_KERNEL);
+ struct kmem_cache_node *n;
if (!map)
return -ENOMEM;
flush_all(s);
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
-
+ for_each_kmem_cache_node(s, node, n)
count += validate_slab_node(s, n, map);
- }
kfree(map);
return count;
}
@@ -4125,6 +4101,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
int node;
unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
sizeof(unsigned long), GFP_KERNEL);
+ struct kmem_cache_node *n;
if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
GFP_TEMPORARY)) {
@@ -4134,8 +4111,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
/* Push back cpu slabs */
flush_all(s);
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ for_each_kmem_cache_node(s, node, n) {
unsigned long flags;
struct page *page;
@@ -4207,7 +4183,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
#endif
#ifdef SLUB_RESILIENCY_TEST
-static void resiliency_test(void)
+static void __init resiliency_test(void)
{
u8 *p;
@@ -4334,8 +4310,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
get_online_mems();
#ifdef CONFIG_SLUB_DEBUG
if (flags & SO_ALL) {
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ struct kmem_cache_node *n;
+
+ for_each_kmem_cache_node(s, node, n) {
if (flags & SO_TOTAL)
x = atomic_long_read(&n->total_objects);
@@ -4351,9 +4328,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
} else
#endif
if (flags & SO_PARTIAL) {
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ struct kmem_cache_node *n;
+ for_each_kmem_cache_node(s, node, n) {
if (flags & SO_TOTAL)
x = count_partial(n, count_total);
else if (flags & SO_OBJECTS)
@@ -4366,7 +4343,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
}
x = sprintf(buf, "%lu", total);
#ifdef CONFIG_NUMA
- for_each_node_state(node, N_NORMAL_MEMORY)
+ for (node = 0; node < nr_node_ids; node++)
if (nodes[node])
x += sprintf(buf + x, " N%d=%lu",
node, nodes[node]);
@@ -4380,16 +4357,12 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
static int any_slab_objects(struct kmem_cache *s)
{
int node;
+ struct kmem_cache_node *n;
- for_each_online_node(node) {
- struct kmem_cache_node *n = get_node(s, node);
-
- if (!n)
- continue;
-
+ for_each_kmem_cache_node(s, node, n)
if (atomic_long_read(&n->total_objects))
return 1;
- }
+
return 0;
}
#endif
@@ -5344,13 +5317,9 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
unsigned long nr_objs = 0;
unsigned long nr_free = 0;
int node;
+ struct kmem_cache_node *n;
- for_each_online_node(node) {
- struct kmem_cache_node *n = get_node(s, node);
-
- if (!n)
- continue;
-
+ for_each_kmem_cache_node(s, node, n) {
nr_slabs += node_nr_slabs(n);
nr_objs += node_nr_objs(n);
nr_free += count_partial(n, count_free);
diff --git a/mm/swap.c b/mm/swap.c
index 9e8e3472248b..3074210f245d 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -62,6 +62,7 @@ static void __page_cache_release(struct page *page)
del_page_from_lru_list(page, lruvec, page_off_lru(page));
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
+ mem_cgroup_uncharge(page);
}
static void __put_single_page(struct page *page)
@@ -695,6 +696,40 @@ void add_page_to_unevictable_list(struct page *page)
spin_unlock_irq(&zone->lru_lock);
}
+/**
+ * lru_cache_add_active_or_unevictable
+ * @page: the page to be added to LRU
+ * @vma: vma in which page is mapped for determining reclaimability
+ *
+ * Place @page on the active or unevictable LRU list, depending on its
+ * evictability. Note that if the page is not evictable, it goes
+ * directly back onto it's zone's unevictable list, it does NOT use a
+ * per cpu pagevec.
+ */
+void lru_cache_add_active_or_unevictable(struct page *page,
+ struct vm_area_struct *vma)
+{
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+
+ if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
+ SetPageActive(page);
+ lru_cache_add(page);
+ return;
+ }
+
+ if (!TestSetPageMlocked(page)) {
+ /*
+ * We use the irq-unsafe __mod_zone_page_stat because this
+ * counter is not modified from interrupt context, and the pte
+ * lock is held(spinlock), which implies preemption disabled.
+ */
+ __mod_zone_page_state(page_zone(page), NR_MLOCK,
+ hpage_nr_pages(page));
+ count_vm_event(UNEVICTABLE_PGMLOCKED);
+ }
+ add_page_to_unevictable_list(page);
+}
+
/*
* If the page can not be invalidated, it is moved to the
* inactive list to speed up its reclaim. It is moved to the
@@ -881,6 +916,8 @@ void release_pages(struct page **pages, int nr, bool cold)
struct lruvec *lruvec;
unsigned long uninitialized_var(flags);
+ mem_cgroup_uncharge_start();
+
for (i = 0; i < nr; i++) {
struct page *page = pages[i];
@@ -912,6 +949,7 @@ void release_pages(struct page **pages, int nr, bool cold)
__ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, page_off_lru(page));
}
+ mem_cgroup_uncharge(page);
/* Clear Active bit in case of parallel mark_page_accessed */
__ClearPageActive(page);
@@ -921,6 +959,8 @@ void release_pages(struct page **pages, int nr, bool cold)
if (zone)
spin_unlock_irqrestore(&zone->lru_lock, flags);
+ mem_cgroup_uncharge_end();
+
free_hot_cold_page_list(&pages_to_free, cold);
}
EXPORT_SYMBOL(release_pages);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 2972eee184a4..e160151da6b8 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -176,7 +176,7 @@ int add_to_swap(struct page *page, struct list_head *list)
if (unlikely(PageTransHuge(page)))
if (unlikely(split_huge_page_to_list(page, list))) {
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
return 0;
}
@@ -202,7 +202,7 @@ int add_to_swap(struct page *page, struct list_head *list)
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
* clear SWAP_HAS_CACHE flag.
*/
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
return 0;
}
}
@@ -225,7 +225,7 @@ void delete_from_swap_cache(struct page *page)
__delete_from_swap_cache(page);
spin_unlock_irq(&address_space->tree_lock);
- swapcache_free(entry, page);
+ swapcache_free(entry);
page_cache_release(page);
}
@@ -386,7 +386,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
* clear SWAP_HAS_CACHE flag.
*/
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
} while (err != -ENOMEM);
if (new_page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4c524f7bd0bf..8798b2e0ac59 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -843,16 +843,13 @@ void swap_free(swp_entry_t entry)
/*
* Called after dropping swapcache to decrease refcnt to swap entries.
*/
-void swapcache_free(swp_entry_t entry, struct page *page)
+void swapcache_free(swp_entry_t entry)
{
struct swap_info_struct *p;
- unsigned char count;
p = swap_info_get(entry);
if (p) {
- count = swap_entry_free(p, entry, SWAP_HAS_CACHE);
- if (page)
- mem_cgroup_uncharge_swapcache(page, entry, count != 0);
+ swap_entry_free(p, entry, SWAP_HAS_CACHE);
spin_unlock(&p->lock);
}
}
@@ -1106,15 +1103,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
if (unlikely(!page))
return -ENOMEM;
- if (mem_cgroup_try_charge_swapin(vma->vm_mm, page,
- GFP_KERNEL, &memcg)) {
+ if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg)) {
ret = -ENOMEM;
goto out_nolock;
}
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
- mem_cgroup_cancel_charge_swapin(memcg);
+ mem_cgroup_cancel_charge(page, memcg);
ret = 0;
goto out;
}
@@ -1124,11 +1120,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
get_page(page);
set_pte_at(vma->vm_mm, addr, pte,
pte_mkold(mk_pte(page, vma->vm_page_prot)));
- if (page == swapcache)
+ if (page == swapcache) {
page_add_anon_rmap(page, vma, addr);
- else /* ksm created a completely new copy */
+ mem_cgroup_commit_charge(page, memcg, true);
+ } else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, addr);
- mem_cgroup_commit_charge_swapin(page, memcg);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
+ }
swap_free(entry);
/*
* Move the page to the active list so it is not
diff --git a/mm/truncate.c b/mm/truncate.c
index 6a78c814bebf..b352481c276d 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -281,7 +281,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE),
indices)) {
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -307,7 +306,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
@@ -367,7 +365,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
pagevec_release(&pvec);
break;
}
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -389,7 +386,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
index++;
}
cleancache_invalidate_inode(mapping);
@@ -488,7 +484,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
indices)) {
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -517,7 +512,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
@@ -548,7 +542,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
BUG_ON(page_has_private(page));
__delete_from_page_cache(page, NULL);
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
if (mapping->a_ops->freepage)
mapping->a_ops->freepage(page);
@@ -597,7 +590,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
indices)) {
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -650,7 +642,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
diff --git a/mm/util.c b/mm/util.c
index d5ea733c5082..8f326edce752 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -3,6 +3,7 @@
#include <linux/string.h>
#include <linux/compiler.h>
#include <linux/export.h>
+#include <linux/ctype.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/security.h>
@@ -65,6 +66,35 @@ char *kstrndup(const char *s, size_t max, gfp_t gfp)
EXPORT_SYMBOL(kstrndup);
/**
+ * kstrimdup - Trim and copy a %NUL terminated string.
+ * @s: the string to trim and duplicate
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Returns an address, which the caller must kfree, containing
+ * a duplicate of the passed string with leading and/or trailing
+ * whitespace (as defined by isspace) removed.
+ */
+char *kstrimdup(const char *s, gfp_t gfp)
+{
+ char *buf;
+ char *begin = skip_spaces(s);
+ size_t len = strlen(begin);
+
+ while (len && isspace(begin[len - 1]))
+ len--;
+
+ buf = kmalloc_track_caller(len + 1, gfp);
+ if (!buf)
+ return NULL;
+
+ memcpy(buf, begin, len);
+ buf[len] = '\0';
+
+ return buf;
+}
+EXPORT_SYMBOL(kstrimdup);
+
+/**
* kmemdup - duplicate region of memory
*
* @src: memory region to duplicate
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f64632b67196..9ec4173f48a8 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1566,7 +1566,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
const int order = 0;
struct page **pages;
unsigned int nr_pages, array_size, i;
- gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+ const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+ const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
@@ -1589,12 +1590,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
- gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
if (node == NUMA_NO_NODE)
- page = alloc_page(tmp_mask);
+ page = alloc_page(alloc_mask);
else
- page = alloc_pages_node(node, tmp_mask, order);
+ page = alloc_pages_node(node, alloc_mask, order);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
@@ -1602,6 +1602,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
goto fail;
}
area->pages[i] = page;
+ if (gfp_mask & __GFP_WAIT)
+ cond_resched();
}
if (map_vm_area(area, prot, &pages))
@@ -2690,14 +2692,14 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
prev_end = VMALLOC_START;
- spin_lock(&vmap_area_lock);
+ rcu_read_lock();
if (list_empty(&vmap_area_list)) {
vmi->largest_chunk = VMALLOC_TOTAL;
goto out;
}
- list_for_each_entry(va, &vmap_area_list, list) {
+ list_for_each_entry_rcu(va, &vmap_area_list, list) {
unsigned long addr = va->va_start;
/*
@@ -2724,7 +2726,7 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
vmi->largest_chunk = VMALLOC_END - prev_end;
out:
- spin_unlock(&vmap_area_lock);
+ rcu_read_unlock();
}
#endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0f16ffe8eb67..6d24fd63b209 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -65,6 +65,9 @@ struct scan_control {
/* Number of pages freed so far during a call to shrink_zones() */
unsigned long nr_reclaimed;
+ /* One of the zones is ready for compaction */
+ int compaction_ready;
+
/* How many pages shrink_list() should reclaim */
unsigned long nr_to_reclaim;
@@ -86,9 +89,6 @@ struct scan_control {
/* Scan (total_size >> priority) pages at once */
int priority;
- /* anon vs. file LRUs scanning "ratio" */
- int swappiness;
-
/*
* The memory cgroup that hit its limit and as a result is the
* primary target of this reclaim invocation.
@@ -571,9 +571,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
+ mem_cgroup_swapout(page, swap);
__delete_from_swap_cache(page);
spin_unlock_irq(&mapping->tree_lock);
- swapcache_free(swap, page);
+ swapcache_free(swap);
} else {
void (*freepage)(struct page *);
void *shadow = NULL;
@@ -594,7 +595,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
shadow = workingset_eviction(mapping, page);
__delete_from_page_cache(page, shadow);
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
if (freepage != NULL)
freepage(page);
@@ -1097,6 +1097,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
*/
__clear_page_locked(page);
free_it:
+ mem_cgroup_uncharge(page);
nr_reclaimed++;
/*
@@ -1126,12 +1127,13 @@ keep:
list_add(&page->lru, &ret_pages);
VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
}
+ mem_cgroup_uncharge_end();
free_hot_cold_page_list(&free_pages, true);
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
- mem_cgroup_uncharge_end();
+
*ret_nr_dirty += nr_dirty;
*ret_nr_congested += nr_congested;
*ret_nr_unqueued_dirty += nr_unqueued_dirty;
@@ -1429,6 +1431,8 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
__ClearPageActive(page);
del_page_from_lru_list(page, lruvec, lru);
+ mem_cgroup_uncharge(page);
+
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
(*get_compound_page_dtor(page))(page);
@@ -1650,6 +1654,8 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
__ClearPageActive(page);
del_page_from_lru_list(page, lruvec, lru);
+ mem_cgroup_uncharge(page);
+
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
(*get_compound_page_dtor(page))(page);
@@ -1865,8 +1871,8 @@ enum scan_balance {
* nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
* nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
*/
-static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
- unsigned long *nr)
+static void get_scan_count(struct lruvec *lruvec, int swappiness,
+ struct scan_control *sc, unsigned long *nr)
{
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
u64 fraction[2];
@@ -1909,7 +1915,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* using the memory controller's swap limit feature would be
* too expensive.
*/
- if (!global_reclaim(sc) && !sc->swappiness) {
+ if (!global_reclaim(sc) && !swappiness) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -1919,7 +1925,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* system is close to OOM, scan both anon and file equally
* (unless the swappiness setting disagrees with swapping).
*/
- if (!sc->priority && sc->swappiness) {
+ if (!sc->priority && swappiness) {
scan_balance = SCAN_EQUAL;
goto out;
}
@@ -1962,7 +1968,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
- anon_prio = sc->swappiness;
+ anon_prio = swappiness;
file_prio = 200 - anon_prio;
/*
@@ -2052,7 +2058,8 @@ out:
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
+ struct scan_control *sc)
{
unsigned long nr[NR_LRU_LISTS];
unsigned long targets[NR_LRU_LISTS];
@@ -2063,7 +2070,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
struct blk_plug plug;
bool scan_adjusted;
- get_scan_count(lruvec, sc, nr);
+ get_scan_count(lruvec, swappiness, sc, nr);
/* Record the original scan target for proportional adjustments later */
memcpy(targets, nr, sizeof(nr));
@@ -2241,9 +2248,10 @@ static inline bool should_continue_reclaim(struct zone *zone,
}
}
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static unsigned long shrink_zone(struct zone *zone, struct scan_control *sc)
{
unsigned long nr_reclaimed, nr_scanned;
+ unsigned long zone_reclaimed = 0;
do {
struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2259,11 +2267,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
struct lruvec *lruvec;
+ int swappiness;
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ swappiness = mem_cgroup_swappiness(memcg);
- sc->swappiness = mem_cgroup_swappiness(memcg);
- shrink_lruvec(lruvec, sc);
+ shrink_lruvec(lruvec, swappiness, sc);
/*
* Direct reclaim and kswapd have to scan all memory
@@ -2287,20 +2296,20 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
sc->nr_scanned - nr_scanned,
sc->nr_reclaimed - nr_reclaimed);
+ zone_reclaimed += sc->nr_reclaimed - nr_reclaimed;
+
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
+
+ return zone_reclaimed;
}
/* Returns true if compaction should go ahead for a high-order request */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+static inline bool compaction_ready(struct zone *zone, int order)
{
unsigned long balance_gap, watermark;
bool watermark_ok;
- /* Do not consider compaction for orders reclaim is meant to satisfy */
- if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
- return false;
-
/*
* Compaction takes time to run and there are potentially other
* callers using the pages just freed. Continue reclaiming until
@@ -2309,18 +2318,18 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
*/
balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
- watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+ watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
/*
* If compaction is deferred, reclaim up to a point where
* compaction will have a chance of success when re-enabled
*/
- if (compaction_deferred(zone, sc->order))
+ if (compaction_deferred(zone, order))
return watermark_ok;
/* If compaction is not ready to start, keep reclaiming */
- if (!compaction_suitable(zone, sc->order))
+ if (!compaction_suitable(zone, order))
return false;
return watermark_ok;
@@ -2342,10 +2351,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
* If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it.
*
- * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is ready to begin. This indicates to
- * the caller that it should consider retrying the allocation instead of
- * further reclaim.
+ * Returns whether the zones overall are reclaimable or not.
*/
static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{
@@ -2354,13 +2360,13 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
unsigned long lru_pages = 0;
- bool aborted_reclaim = false;
struct reclaim_state *reclaim_state = current->reclaim_state;
gfp_t orig_mask;
struct shrink_control shrink = {
.gfp_mask = sc->gfp_mask,
};
enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
+ bool all_unreclaimable = true;
/*
* If the number of buffer_heads in the machine exceeds the maximum
@@ -2375,6 +2381,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
for_each_zone_zonelist_nodemask(zone, z, zonelist,
gfp_zone(sc->gfp_mask), sc->nodemask) {
+ unsigned long zone_reclaimed = 0;
+
if (!populated_zone(zone))
continue;
/*
@@ -2391,22 +2399,24 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
if (sc->priority != DEF_PRIORITY &&
!zone_reclaimable(zone))
continue; /* Let kswapd poll it */
- if (IS_ENABLED(CONFIG_COMPACTION)) {
- /*
- * If we already have plenty of memory free for
- * compaction in this zone, don't free any more.
- * Even though compaction is invoked for any
- * non-zero order, only frequent costly order
- * reclamation is disruptive enough to become a
- * noticeable problem, like transparent huge
- * page allocations.
- */
- if ((zonelist_zone_idx(z) <= requested_highidx)
- && compaction_ready(zone, sc)) {
- aborted_reclaim = true;
- continue;
- }
+
+ /*
+ * If we already have plenty of memory free for
+ * compaction in this zone, don't free any more.
+ * Even though compaction is invoked for any
+ * non-zero order, only frequent costly order
+ * reclamation is disruptive enough to become a
+ * noticeable problem, like transparent huge
+ * page allocations.
+ */
+ if (IS_ENABLED(CONFIG_COMPACTION) &&
+ sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+ zonelist_zone_idx(z) <= requested_highidx &&
+ compaction_ready(zone, sc->order)) {
+ sc->compaction_ready = true;
+ continue;
}
+
/*
* This steals pages from memory cgroups over softlimit
* and returns the number of reclaimed pages and
@@ -2419,10 +2429,15 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
&nr_soft_scanned);
sc->nr_reclaimed += nr_soft_reclaimed;
sc->nr_scanned += nr_soft_scanned;
+ zone_reclaimed += nr_soft_reclaimed;
/* need some check for avoid more shrink_zone() */
}
- shrink_zone(zone, sc);
+ zone_reclaimed += shrink_zone(zone, sc);
+
+ if (zone_reclaimed ||
+ (global_reclaim(sc) && zone_reclaimable(zone)))
+ all_unreclaimable = false;
}
/*
@@ -2445,27 +2460,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
*/
sc->gfp_mask = orig_mask;
- return aborted_reclaim;
-}
-
-/* All zones in zonelist are unreclaimable? */
-static bool all_unreclaimable(struct zonelist *zonelist,
- struct scan_control *sc)
-{
- struct zoneref *z;
- struct zone *zone;
-
- for_each_zone_zonelist_nodemask(zone, z, zonelist,
- gfp_zone(sc->gfp_mask), sc->nodemask) {
- if (!populated_zone(zone))
- continue;
- if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
- continue;
- if (zone_reclaimable(zone))
- return false;
- }
-
- return true;
+ return !all_unreclaimable;
}
/*
@@ -2489,7 +2484,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
{
unsigned long total_scanned = 0;
unsigned long writeback_threshold;
- bool aborted_reclaim;
+ bool zones_reclaimable;
delayacct_freepages_start();
@@ -2500,12 +2495,15 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
sc->priority);
sc->nr_scanned = 0;
- aborted_reclaim = shrink_zones(zonelist, sc);
+ zones_reclaimable = shrink_zones(zonelist, sc);
total_scanned += sc->nr_scanned;
if (sc->nr_reclaimed >= sc->nr_to_reclaim)
goto out;
+ if (sc->compaction_ready)
+ goto out;
+
/*
* If we're getting trouble reclaiming, start doing
* writepage even in laptop mode.
@@ -2526,7 +2524,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
WB_REASON_TRY_TO_FREE_PAGES);
sc->may_writepage = 1;
}
- } while (--sc->priority >= 0 && !aborted_reclaim);
+ } while (--sc->priority >= 0);
out:
delayacct_freepages_end();
@@ -2534,20 +2532,12 @@ out:
if (sc->nr_reclaimed)
return sc->nr_reclaimed;
- /*
- * As hibernation is going on, kswapd is freezed so that it can't mark
- * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
- * check.
- */
- if (oom_killer_disabled)
- return 0;
-
/* Aborted reclaim to try compaction? don't OOM, then */
- if (aborted_reclaim)
+ if (sc->compaction_ready)
return 1;
- /* top priority shrink_zones still had more to do? don't OOM, then */
- if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
+ /* Any of the zones still reclaimable? Don't OOM. */
+ if (zones_reclaimable)
return 1;
return 0;
@@ -2729,10 +2719,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
.may_swap = !noswap,
.order = 0,
.priority = 0,
- .swappiness = mem_cgroup_swappiness(memcg),
.target_mem_cgroup = memcg,
};
struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ int swappiness = mem_cgroup_swappiness(memcg);
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2748,7 +2738,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
- shrink_lruvec(lruvec, &sc);
+ shrink_lruvec(lruvec, swappiness, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
diff --git a/mm/zbud.c b/mm/zbud.c
index 01df13a7e2e1..440bab7912ae 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -51,6 +51,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/zbud.h>
+#include <linux/zpool.h>
/*****************
* Structures
@@ -94,6 +95,7 @@ struct zbud_pool {
struct list_head lru;
u64 pages_nr;
struct zbud_ops *ops;
+ gfp_t gfp;
};
/*
@@ -113,6 +115,89 @@ struct zbud_header {
};
/*****************
+ * zpool
+ ****************/
+
+#ifdef CONFIG_ZPOOL
+
+static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle)
+{
+ return zpool_evict(pool, handle);
+}
+
+static struct zbud_ops zbud_zpool_ops = {
+ .evict = zbud_zpool_evict
+};
+
+static void *zbud_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+ return zbud_create_pool(gfp, &zbud_zpool_ops);
+}
+
+void zbud_zpool_destroy(void *pool)
+{
+ zbud_destroy_pool(pool);
+}
+
+int zbud_zpool_malloc(void *pool, size_t size, unsigned long *handle)
+{
+ return zbud_alloc(pool, size, handle);
+}
+void zbud_zpool_free(void *pool, unsigned long handle)
+{
+ zbud_free(pool, handle);
+}
+
+int zbud_zpool_shrink(void *pool, unsigned int pages,
+ unsigned int *reclaimed)
+{
+ unsigned int total = 0;
+ int ret = -EINVAL;
+
+ while (total < pages) {
+ ret = zbud_reclaim_page(pool, 8);
+ if (ret < 0)
+ break;
+ total++;
+ }
+
+ if (reclaimed)
+ *reclaimed = total;
+
+ return ret;
+}
+
+void *zbud_zpool_map(void *pool, unsigned long handle,
+ enum zpool_mapmode mm)
+{
+ return zbud_map(pool, handle);
+}
+void zbud_zpool_unmap(void *pool, unsigned long handle)
+{
+ zbud_unmap(pool, handle);
+}
+
+u64 zbud_zpool_total_size(void *pool)
+{
+ return zbud_get_pool_size(pool) * PAGE_SIZE;
+}
+
+static struct zpool_driver zbud_zpool_driver = {
+ .type = "zbud",
+ .owner = THIS_MODULE,
+ .create = zbud_zpool_create,
+ .destroy = zbud_zpool_destroy,
+ .malloc = zbud_zpool_malloc,
+ .free = zbud_zpool_free,
+ .shrink = zbud_zpool_shrink,
+ .map = zbud_zpool_map,
+ .unmap = zbud_zpool_unmap,
+ .total_size = zbud_zpool_total_size,
+};
+
+#endif /* CONFIG_ZPOOL */
+
+/*****************
* Helpers
*****************/
/* Just to make the code easier to read */
@@ -122,7 +207,7 @@ enum buddy {
};
/* Converts an allocation size in bytes to size in zbud chunks */
-static int size_to_chunks(int size)
+static int size_to_chunks(size_t size)
{
return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
}
@@ -193,9 +278,12 @@ static int num_free_chunks(struct zbud_header *zhdr)
*****************/
/**
* zbud_create_pool() - create a new zbud pool
- * @gfp: gfp flags when allocating the zbud pool structure
+ * @gfp: gfp flags when growing the pool
* @ops: user-defined operations for the zbud pool
*
+ * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
+ * as zbud pool pages.
+ *
* Return: pointer to the new zbud pool or NULL if the metadata allocation
* failed.
*/
@@ -204,7 +292,9 @@ struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops)
struct zbud_pool *pool;
int i;
- pool = kmalloc(sizeof(struct zbud_pool), gfp);
+ if (gfp & __GFP_HIGHMEM)
+ return NULL;
+ pool = kmalloc(sizeof(struct zbud_pool), GFP_KERNEL);
if (!pool)
return NULL;
spin_lock_init(&pool->lock);
@@ -214,6 +304,7 @@ struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops)
INIT_LIST_HEAD(&pool->lru);
pool->pages_nr = 0;
pool->ops = ops;
+ pool->gfp = gfp;
return pool;
}
@@ -232,7 +323,6 @@ void zbud_destroy_pool(struct zbud_pool *pool)
* zbud_alloc() - allocates a region of a given size
* @pool: zbud pool from which to allocate
* @size: size in bytes of the desired allocation
- * @gfp: gfp flags used if the pool needs to grow
* @handle: handle of the new allocation
*
* This function will attempt to find a free region in the pool large enough to
@@ -240,22 +330,18 @@ void zbud_destroy_pool(struct zbud_pool *pool)
* performed first. If no suitable free region is found, then a new page is
* allocated and added to the pool to satisfy the request.
*
- * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
- * as zbud pool pages.
- *
- * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
- * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
- * a new page.
+ * Return: 0 if success and @handle is set, -ENOSPC if the @size is too large,
+ * -EINVAL if the @size is 0, or -ENOMEM if the pool was unable to
+ * allocate a new page.
*/
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
- unsigned long *handle)
+int zbud_alloc(struct zbud_pool *pool, size_t size, unsigned long *handle)
{
int chunks, i, freechunks;
struct zbud_header *zhdr = NULL;
enum buddy bud;
struct page *page;
- if (!size || (gfp & __GFP_HIGHMEM))
+ if (!size)
return -EINVAL;
if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
return -ENOSPC;
@@ -279,7 +365,7 @@ int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
/* Couldn't find unbuddied zbud page, create new one */
spin_unlock(&pool->lock);
- page = alloc_page(gfp);
+ page = alloc_page(pool->gfp);
if (!page)
return -ENOMEM;
spin_lock(&pool->lock);
@@ -511,11 +597,20 @@ static int __init init_zbud(void)
/* Make sure the zbud header will fit in one chunk */
BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
pr_info("loaded\n");
+
+#ifdef CONFIG_ZPOOL
+ zpool_register_driver(&zbud_zpool_driver);
+#endif
+
return 0;
}
static void __exit exit_zbud(void)
{
+#ifdef CONFIG_ZPOOL
+ zpool_unregister_driver(&zbud_zpool_driver);
+#endif
+
pr_info("unloaded\n");
}
diff --git a/mm/zpool.c b/mm/zpool.c
new file mode 100644
index 000000000000..f503474d2923
--- /dev/null
+++ b/mm/zpool.c
@@ -0,0 +1,207 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for memory storage pool implementations.
+ * Typically, this is used to store compressed memory.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/zpool.h>
+
+struct zpool {
+ char *type;
+
+ struct zpool_driver *driver;
+ void *pool;
+ struct zpool_ops *ops;
+
+ struct list_head list;
+};
+
+static LIST_HEAD(drivers_head);
+static DEFINE_SPINLOCK(drivers_lock);
+
+static LIST_HEAD(pools_head);
+static DEFINE_SPINLOCK(pools_lock);
+
+void zpool_register_driver(struct zpool_driver *driver)
+{
+ spin_lock(&drivers_lock);
+ list_add(&driver->list, &drivers_head);
+ spin_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(zpool_register_driver);
+
+void zpool_unregister_driver(struct zpool_driver *driver)
+{
+ spin_lock(&drivers_lock);
+ list_del(&driver->list);
+ spin_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(zpool_unregister_driver);
+
+int zpool_evict(void *pool, unsigned long handle)
+{
+ struct zpool *zpool;
+
+ spin_lock(&pools_lock);
+ list_for_each_entry(zpool, &pools_head, list) {
+ if (zpool->pool == pool) {
+ spin_unlock(&pools_lock);
+ if (!zpool->ops || !zpool->ops->evict)
+ return -EINVAL;
+ return zpool->ops->evict(zpool, handle);
+ }
+ }
+ spin_unlock(&pools_lock);
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL(zpool_evict);
+
+static struct zpool_driver *zpool_get_driver(char *type)
+{
+ struct zpool_driver *driver;
+
+ spin_lock(&drivers_lock);
+ list_for_each_entry(driver, &drivers_head, list) {
+ if (!strcmp(driver->type, type)) {
+ bool got = try_module_get(driver->owner);
+
+ spin_unlock(&drivers_lock);
+ return got ? driver : NULL;
+ }
+ }
+
+ spin_unlock(&drivers_lock);
+ return NULL;
+}
+
+static void zpool_put_driver(struct zpool_driver *driver)
+{
+ module_put(driver->owner);
+}
+
+struct zpool *zpool_create_pool(char *type, gfp_t flags,
+ struct zpool_ops *ops)
+{
+ struct zpool_driver *driver;
+ struct zpool *zpool;
+
+ pr_info("creating pool type %s\n", type);
+
+ driver = zpool_get_driver(type);
+
+ if (!driver) {
+ request_module(type);
+ driver = zpool_get_driver(type);
+ }
+
+ if (!driver) {
+ pr_err("no driver for type %s\n", type);
+ return NULL;
+ }
+
+ zpool = kmalloc(sizeof(*zpool), GFP_KERNEL);
+ if (!zpool) {
+ pr_err("couldn't create zpool - out of memory\n");
+ zpool_put_driver(driver);
+ return NULL;
+ }
+
+ zpool->type = driver->type;
+ zpool->driver = driver;
+ zpool->pool = driver->create(flags, ops);
+ zpool->ops = ops;
+
+ if (!zpool->pool) {
+ pr_err("couldn't create %s pool\n", type);
+ zpool_put_driver(driver);
+ kfree(zpool);
+ return NULL;
+ }
+
+ pr_info("created %s pool\n", type);
+
+ spin_lock(&pools_lock);
+ list_add(&zpool->list, &pools_head);
+ spin_unlock(&pools_lock);
+
+ return zpool;
+}
+
+void zpool_destroy_pool(struct zpool *zpool)
+{
+ pr_info("destroying pool type %s\n", zpool->type);
+
+ spin_lock(&pools_lock);
+ list_del(&zpool->list);
+ spin_unlock(&pools_lock);
+ zpool->driver->destroy(zpool->pool);
+ zpool_put_driver(zpool->driver);
+ kfree(zpool);
+}
+
+char *zpool_get_type(struct zpool *zpool)
+{
+ return zpool->type;
+}
+
+int zpool_malloc(struct zpool *zpool, size_t size, unsigned long *handle)
+{
+ return zpool->driver->malloc(zpool->pool, size, handle);
+}
+
+void zpool_free(struct zpool *zpool, unsigned long handle)
+{
+ zpool->driver->free(zpool->pool, handle);
+}
+
+int zpool_shrink(struct zpool *zpool, unsigned int pages,
+ unsigned int *reclaimed)
+{
+ return zpool->driver->shrink(zpool->pool, pages, reclaimed);
+}
+
+void *zpool_map_handle(struct zpool *zpool, unsigned long handle,
+ enum zpool_mapmode mapmode)
+{
+ return zpool->driver->map(zpool->pool, handle, mapmode);
+}
+
+void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
+{
+ zpool->driver->unmap(zpool->pool, handle);
+}
+
+u64 zpool_get_total_size(struct zpool *zpool)
+{
+ return zpool->driver->total_size(zpool->pool);
+}
+
+static int __init init_zpool(void)
+{
+ pr_info("loaded\n");
+ return 0;
+}
+
+static void __exit exit_zpool(void)
+{
+ pr_info("unloaded\n");
+}
+
+module_init(init_zpool);
+module_exit(exit_zpool);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+MODULE_DESCRIPTION("Common API for compressed memory storage");
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index fe78189624cf..ae3a28fd8fde 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -92,6 +92,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/zsmalloc.h>
+#include <linux/zpool.h>
/*
* This must be power of 2 and greater than of equal to sizeof(link_free).
@@ -240,6 +241,80 @@ struct mapping_area {
enum zs_mapmode vm_mm; /* mapping mode */
};
+/* zpool driver */
+
+#ifdef CONFIG_ZPOOL
+
+static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+ return zs_create_pool(gfp);
+}
+
+void zs_zpool_destroy(void *pool)
+{
+ zs_destroy_pool(pool);
+}
+
+int zs_zpool_malloc(void *pool, size_t size, unsigned long *handle)
+{
+ *handle = zs_malloc(pool, size);
+ return *handle ? 0 : -1;
+}
+void zs_zpool_free(void *pool, unsigned long handle)
+{
+ zs_free(pool, handle);
+}
+
+int zs_zpool_shrink(void *pool, unsigned int pages,
+ unsigned int *reclaimed)
+{
+ return -EINVAL;
+}
+
+void *zs_zpool_map(void *pool, unsigned long handle,
+ enum zpool_mapmode mm)
+{
+ enum zs_mapmode zs_mm;
+
+ switch (mm) {
+ case ZPOOL_MM_RO:
+ zs_mm = ZS_MM_RO;
+ break;
+ case ZPOOL_MM_WO:
+ zs_mm = ZS_MM_WO;
+ break;
+ case ZPOOL_MM_RW: /* fallthru */
+ default:
+ zs_mm = ZS_MM_RW;
+ break;
+ }
+
+ return zs_map_object(pool, handle, zs_mm);
+}
+void zs_zpool_unmap(void *pool, unsigned long handle)
+{
+ zs_unmap_object(pool, handle);
+}
+
+u64 zs_zpool_total_size(void *pool)
+{
+ return zs_get_total_size_bytes(pool);
+}
+
+static struct zpool_driver zs_zpool_driver = {
+ .type = "zsmalloc",
+ .owner = THIS_MODULE,
+ .create = zs_zpool_create,
+ .destroy = zs_zpool_destroy,
+ .malloc = zs_zpool_malloc,
+ .free = zs_zpool_free,
+ .shrink = zs_zpool_shrink,
+ .map = zs_zpool_map,
+ .unmap = zs_zpool_unmap,
+ .total_size = zs_zpool_total_size,
+};
+
+#endif /* CONFIG_ZPOOL */
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
@@ -814,6 +889,10 @@ static void zs_exit(void)
{
int cpu;
+#ifdef CONFIG_ZPOOL
+ zpool_unregister_driver(&zs_zpool_driver);
+#endif
+
cpu_notifier_register_begin();
for_each_online_cpu(cpu)
@@ -840,6 +919,10 @@ static int zs_init(void)
cpu_notifier_register_done();
+#ifdef CONFIG_ZPOOL
+ zpool_register_driver(&zs_zpool_driver);
+#endif
+
return 0;
fail:
zs_exit();
diff --git a/mm/zswap.c b/mm/zswap.c
index 008388fe7b0f..87d77fc5ed34 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -34,7 +34,7 @@
#include <linux/swap.h>
#include <linux/crypto.h>
#include <linux/mempool.h>
-#include <linux/zbud.h>
+#include <linux/zpool.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
@@ -45,8 +45,8 @@
/*********************************
* statistics
**********************************/
-/* Number of memory pages used by the compressed pool */
-static u64 zswap_pool_pages;
+/* Total bytes used by the compressed storage */
+static u64 zswap_pool_total_size;
/* The number of compressed pages currently stored in zswap */
static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
@@ -89,8 +89,13 @@ static unsigned int zswap_max_pool_percent = 20;
module_param_named(max_pool_percent,
zswap_max_pool_percent, uint, 0644);
-/* zbud_pool is shared by all of zswap backend */
-static struct zbud_pool *zswap_pool;
+/* Compressed storage to use */
+#define ZSWAP_ZPOOL_DEFAULT "zbud"
+static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+module_param_named(zpool, zswap_zpool_type, charp, 0444);
+
+/* zpool is shared by all of zswap backend */
+static struct zpool *zswap_pool;
/*********************************
* compression functions
@@ -168,7 +173,7 @@ static void zswap_comp_exit(void)
* be held while changing the refcount. Since the lock must
* be held, there is no reason to also make refcount atomic.
* offset - the swap offset for the entry. Index into the red-black tree.
- * handle - zbud allocation handle that stores the compressed page data
+ * handle - zpool allocation handle that stores the compressed page data
* length - the length in bytes of the compressed page data. Needed during
* decompression
*/
@@ -207,7 +212,7 @@ static int zswap_entry_cache_create(void)
return zswap_entry_cache == NULL;
}
-static void zswap_entry_cache_destory(void)
+static void __init zswap_entry_cache_destroy(void)
{
kmem_cache_destroy(zswap_entry_cache);
}
@@ -284,15 +289,15 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
}
/*
- * Carries out the common pattern of freeing and entry's zbud allocation,
+ * Carries out the common pattern of freeing and entry's zpool allocation,
* freeing the entry itself, and decrementing the number of stored pages.
*/
static void zswap_free_entry(struct zswap_entry *entry)
{
- zbud_free(zswap_pool, entry->handle);
+ zpool_free(zswap_pool, entry->handle);
zswap_entry_cache_free(entry);
atomic_dec(&zswap_stored_pages);
- zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+ zswap_pool_total_size = zpool_get_total_size(zswap_pool);
}
/* caller must hold the tree lock */
@@ -409,7 +414,7 @@ cleanup:
static bool zswap_is_full(void)
{
return totalram_pages * zswap_max_pool_percent / 100 <
- zswap_pool_pages;
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
}
/*********************************
@@ -502,7 +507,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
* clear SWAP_HAS_CACHE flag.
*/
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
} while (err != -ENOMEM);
if (new_page)
@@ -525,7 +530,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
* the swap cache, the compressed version stored by zswap can be
* freed.
*/
-static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
+static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
{
struct zswap_header *zhdr;
swp_entry_t swpentry;
@@ -541,9 +546,9 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
};
/* extract swpentry from data */
- zhdr = zbud_map(pool, handle);
+ zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
swpentry = zhdr->swpentry; /* here */
- zbud_unmap(pool, handle);
+ zpool_unmap_handle(pool, handle);
tree = zswap_trees[swp_type(swpentry)];
offset = swp_offset(swpentry);
@@ -573,13 +578,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
case ZSWAP_SWAPCACHE_NEW: /* page is locked */
/* decompress */
dlen = PAGE_SIZE;
- src = (u8 *)zbud_map(zswap_pool, entry->handle) +
- sizeof(struct zswap_header);
+ src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+ ZPOOL_MM_RO) + sizeof(struct zswap_header);
dst = kmap_atomic(page);
ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
entry->length, dst, &dlen);
kunmap_atomic(dst);
- zbud_unmap(zswap_pool, entry->handle);
+ zpool_unmap_handle(zswap_pool, entry->handle);
BUG_ON(ret);
BUG_ON(dlen != PAGE_SIZE);
@@ -652,7 +657,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
/* reclaim space if needed */
if (zswap_is_full()) {
zswap_pool_limit_hit++;
- if (zbud_reclaim_page(zswap_pool, 8)) {
+ if (zpool_shrink(zswap_pool, 1, NULL)) {
zswap_reject_reclaim_fail++;
ret = -ENOMEM;
goto reject;
@@ -679,8 +684,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
/* store */
len = dlen + sizeof(struct zswap_header);
- ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
- &handle);
+ ret = zpool_malloc(zswap_pool, len, &handle);
if (ret == -ENOSPC) {
zswap_reject_compress_poor++;
goto freepage;
@@ -689,11 +693,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
zswap_reject_alloc_fail++;
goto freepage;
}
- zhdr = zbud_map(zswap_pool, handle);
+ zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
zhdr->swpentry = swp_entry(type, offset);
buf = (u8 *)(zhdr + 1);
memcpy(buf, dst, dlen);
- zbud_unmap(zswap_pool, handle);
+ zpool_unmap_handle(zswap_pool, handle);
put_cpu_var(zswap_dstmem);
/* populate entry */
@@ -716,7 +720,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
/* update stats */
atomic_inc(&zswap_stored_pages);
- zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+ zswap_pool_total_size = zpool_get_total_size(zswap_pool);
return 0;
@@ -752,13 +756,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
/* decompress */
dlen = PAGE_SIZE;
- src = (u8 *)zbud_map(zswap_pool, entry->handle) +
- sizeof(struct zswap_header);
+ src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+ ZPOOL_MM_RO) + sizeof(struct zswap_header);
dst = kmap_atomic(page);
ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
dst, &dlen);
kunmap_atomic(dst);
- zbud_unmap(zswap_pool, entry->handle);
+ zpool_unmap_handle(zswap_pool, entry->handle);
BUG_ON(ret);
spin_lock(&tree->lock);
@@ -811,7 +815,7 @@ static void zswap_frontswap_invalidate_area(unsigned type)
zswap_trees[type] = NULL;
}
-static struct zbud_ops zswap_zbud_ops = {
+static struct zpool_ops zswap_zpool_ops = {
.evict = zswap_writeback_entry
};
@@ -869,8 +873,8 @@ static int __init zswap_debugfs_init(void)
zswap_debugfs_root, &zswap_written_back_pages);
debugfs_create_u64("duplicate_entry", S_IRUGO,
zswap_debugfs_root, &zswap_duplicate_entry);
- debugfs_create_u64("pool_pages", S_IRUGO,
- zswap_debugfs_root, &zswap_pool_pages);
+ debugfs_create_u64("pool_total_size", S_IRUGO,
+ zswap_debugfs_root, &zswap_pool_total_size);
debugfs_create_atomic_t("stored_pages", S_IRUGO,
zswap_debugfs_root, &zswap_stored_pages);
@@ -895,16 +899,26 @@ static void __exit zswap_debugfs_exit(void) { }
**********************************/
static int __init init_zswap(void)
{
+ gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+
if (!zswap_enabled)
return 0;
pr_info("loading zswap\n");
- zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
+ zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, &zswap_zpool_ops);
+ if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
+ pr_info("%s zpool not available\n", zswap_zpool_type);
+ zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+ zswap_pool = zpool_create_pool(zswap_zpool_type, gfp,
+ &zswap_zpool_ops);
+ }
if (!zswap_pool) {
- pr_err("zbud pool creation failed\n");
+ pr_err("%s zpool not available\n", zswap_zpool_type);
+ pr_err("zpool creation failed\n");
goto error;
}
+ pr_info("using %s pool\n", zswap_zpool_type);
if (zswap_entry_cache_create()) {
pr_err("entry cache creation failed\n");
@@ -926,9 +940,9 @@ static int __init init_zswap(void)
pcpufail:
zswap_comp_exit();
compfail:
- zswap_entry_cache_destory();
+ zswap_entry_cache_destroy();
cachefail:
- zbud_destroy_pool(zswap_pool);
+ zpool_destroy_pool(zswap_pool);
error:
return -ENOMEM;
}
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index f14e54a05691..16dd40910c65 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -184,7 +184,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
/* Reached the end of the list, so insert after 'frag_entry_curr'. */
if (likely(frag_entry_curr)) {
- hlist_add_after(&frag_entry_curr->list, &frag_entry_new->list);
+ hlist_add_behind(&frag_entry_new->list, &frag_entry_curr->list);
chain->size += skb->len - hdr_size;
chain->timestamp = jiffies;
ret = true;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index abfa0b65a111..d9c4f570ab0d 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br,
}
if (slot)
- hlist_add_after_rcu(slot, &port->rlist);
+ hlist_add_behind_rcu(&port->rlist, slot);
else
hlist_add_head_rcu(&port->rlist, &br->router_list);
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5afeb5aa4c7c..e9cb2588e416 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -940,7 +940,7 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
last = li;
}
if (last)
- hlist_add_after_rcu(&last->hlist, &new->hlist);
+ hlist_add_behind_rcu(&new->hlist, &last->hlist);
else
hlist_add_before_rcu(&new->hlist, &li->hlist);
}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 731e1e1722d9..fd0dc47f471d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -277,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
last = p;
}
if (last)
- hlist_add_after_rcu(&last->list, &newp->list);
+ hlist_add_behind_rcu(&newp->list, &last->list);
else
hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
out:
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a8ef5108e0d8..92cb08d51827 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -389,7 +389,7 @@ redo:
if (h != h0)
continue;
hlist_del(&pol->bydst);
- hlist_add_after(entry0, &pol->bydst);
+ hlist_add_behind(&pol->bydst, entry0);
}
entry0 = &pol->bydst;
}
@@ -654,7 +654,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
break;
}
if (newpos)
- hlist_add_after(newpos, &policy->bydst);
+ hlist_add_behind(&policy->bydst, newpos);
else
hlist_add_head(&policy->bydst, chain);
xfrm_pol_hold(policy);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 182be0f12407..6c7cbaf44358 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1637,11 +1637,13 @@ sub process {
my $signoff = 0;
my $is_patch = 0;
- my $in_header_lines = 1;
+ my $in_header_lines = $file ? 0 : 1;
my $in_commit_log = 0; #Scanning lines before patch
my $non_utf8_charset = 0;
+ my $last_blank_line = 0;
+
our @report = ();
our $cnt_lines = 0;
our $cnt_error = 0;
@@ -1993,7 +1995,8 @@ sub process {
# Check if it's the start of a commit log
# (not a header line and we haven't seen the patch filename)
if ($in_header_lines && $realfile =~ /^$/ &&
- $rawline !~ /^(commit\b|from\b|[\w-]+:).+$/i) {
+ !($rawline =~ /^\s+\S/ ||
+ $rawline =~ /^(commit\b|from\b|[\w-]+:).*$/i)) {
$in_header_lines = 0;
$in_commit_log = 1;
}
@@ -2049,7 +2052,7 @@ sub process {
# Only applies when adding the entry originally, after that we do not have
# sufficient context to determine whether it is indeed long enough.
if ($realfile =~ /Kconfig/ &&
- $line =~ /.\s*config\s+/) {
+ $line =~ /^\+\s*config\s+/) {
my $length = 0;
my $cnt = $realcnt;
my $ln = $linenr + 1;
@@ -2062,10 +2065,11 @@ sub process {
$is_end = $lines[$ln - 1] =~ /^\+/;
next if ($f =~ /^-/);
+ last if (!$file && $f =~ /^\@\@/);
- if ($lines[$ln - 1] =~ /.\s*(?:bool|tristate)\s*\"/) {
+ if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate)\s*\"/) {
$is_start = 1;
- } elsif ($lines[$ln - 1] =~ /.\s*(?:---)?help(?:---)?$/) {
+ } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) {
$length = -1;
}
@@ -2291,10 +2295,36 @@ sub process {
"networking block comments put the trailing */ on a separate line\n" . $herecurr);
}
+# check for missing blank lines after struct/union declarations
+# with exceptions for various attributes and macros
+ if ($prevline =~ /^[\+ ]};?\s*$/ &&
+ !($line =~ /^\+\s*$/ ||
+ $line =~ /^\+\s*EXPORT_SYMBOL/ ||
+ $line =~ /^\+\s*MODULE_/i ||
+ $line =~ /^\+\s*\#\s*(?:end|elif|else)/ ||
+ $line =~ /^\+[a-z_]*init/ ||
+ $line =~ /^\+\s*(?:static\s+)?[A-Z_]*ATTR/ ||
+ $line =~ /^\+\s*DECLARE/ ||
+ $line =~ /^\+\s*__setup/)) {
+ CHK("LINE_SPACING",
+ "Please use a blank line after function/struct/union/enum declarations\n" . $hereprev);
+ }
+
+# check for multiple consecutive blank lines
+ if ($prevline =~ /^[\+ ]\s*$/ &&
+ $line =~ /^\+\s*$/ &&
+ $last_blank_line != ($linenr - 1)) {
+ CHK("LINE_SPACING",
+ "Please don't use multiple blank lines\n" . $hereprev);
+ $last_blank_line = $linenr;
+ }
+
# check for missing blank lines after declarations
if ($sline =~ /^\+\s+\S/ && #Not at char 1
# actual declarations
($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # function pointer declarations
+ $prevline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
# foo bar; where foo is some local typedef or #define
$prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
# known declaration macros
@@ -2307,6 +2337,8 @@ sub process {
$prevline =~ /(?:\{\s*|\\)$/) &&
# looks like a declaration
!($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # function pointer declarations
+ $sline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
# foo bar; where foo is some local typedef or #define
$sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
# known declaration macros
@@ -2321,7 +2353,7 @@ sub process {
$sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
# indentation of previous and current line are the same
(($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
- WARN("SPACING",
+ WARN("LINE_SPACING",
"Missing a blank line after declarations\n" . $hereprev);
}
@@ -2342,6 +2374,16 @@ sub process {
# check we are in a valid C source file if not then ignore this hunk
next if ($realfile !~ /\.(h|c)$/);
+# check indentation of any line with a bare else
+# if the previous line is a break or return and is indented 1 tab more...
+ if ($sline =~ /^\+([\t]+)(?:}[ \t]*)?else(?:[ \t]*{)?\s*$/) {
+ my $tabs = length($1) + 1;
+ if ($prevline =~ /^\+\t{$tabs,$tabs}(?:break|return)\b/) {
+ WARN("UNNECESSARY_ELSE",
+ "else is not generally useful after a break or return\n" . $hereprev);
+ }
+ }
+
# discourage the addition of CONFIG_EXPERIMENTAL in #if(def).
if ($line =~ /^\+\s*\#\s*if.*\bCONFIG_EXPERIMENTAL\b/) {
WARN("CONFIG_EXPERIMENTAL",
@@ -3448,6 +3490,14 @@ sub process {
}
}
+# check unnecessary parentheses around addressof/dereference single $Lvals
+# ie: &(foo->bar) should be &foo->bar and *(foo->bar) should be *foo->bar
+
+ while ($line =~ /(?:[^&]&\s*|\*)\(\s*($Ident\s*(?:$Member\s*)+)\s*\)/g) {
+ CHK("UNNECESSARY_PARENTHESES",
+ "Unnecessary parentheses around $1\n" . $herecurr);
+ }
+
#goto labels aren't indented, allow a single space however
if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
!($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
@@ -3606,7 +3656,7 @@ sub process {
# if should not continue a brace
if ($line =~ /}\s*if\b/) {
ERROR("TRAILING_STATEMENTS",
- "trailing statements should be on next line\n" .
+ "trailing statements should be on next line (or did you mean 'else if'?)\n" .
$herecurr);
}
# case and default should not have general statements after them
@@ -3762,7 +3812,7 @@ sub process {
$dstat !~ /^(?:$Ident|-?$Constant),$/ && # 10, // foo(),
$dstat !~ /^(?:$Ident|-?$Constant);$/ && # foo();
$dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ && # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz
- $dstat !~ /^'X'$/ && # character constants
+ $dstat !~ /^'X'$/ && $dstat !~ /^'XX'$/ && # character constants
$dstat !~ /$exceptions/ &&
$dstat !~ /^\.$Ident\s*=/ && # .foo =
$dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo
@@ -4014,6 +4064,23 @@ sub process {
}
}
+# check for unnecessary "Out of Memory" messages
+ if ($line =~ /^\+.*\b$logFunctions\s*\(/ &&
+ $prevline =~ /^[ \+]\s*if\s*\(\s*(\!\s*|NULL\s*==\s*)?($Lval)(\s*==\s*NULL\s*)?\s*\)/ &&
+ (defined $1 || defined $3) &&
+ $linenr > 3) {
+ my $testval = $2;
+ my $testline = $lines[$linenr - 3];
+
+ my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0);
+# print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n");
+
+ if ($c =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|(?:dev_)?alloc_skb)/) {
+ WARN("OOM_MESSAGE",
+ "Possible unnecessary 'out of memory' message\n" . $hereprev);
+ }
+ }
+
# check for bad placement of section $InitAttribute (e.g.: __initdata)
if ($line =~ /(\b$InitAttribute\b)/) {
my $attr = $1;
diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile
index ae5faf9aade2..790c23a9db44 100644
--- a/tools/testing/selftests/cpu-hotplug/Makefile
+++ b/tools/testing/selftests/cpu-hotplug/Makefile
@@ -1,6 +1,6 @@
all:
run_tests:
- @/bin/sh ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]"
+ @/bin/bash ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]"
clean:
diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
index 350bfeda3aa8..058c76f5d102 100644
--- a/tools/testing/selftests/memory-hotplug/Makefile
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -1,6 +1,6 @@
all:
run_tests:
- @/bin/sh ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
+ @/bin/bash ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
clean: