summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2014-08-07 16:51:15 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2014-08-07 16:51:15 +1000
commite2ea6c3da34fa52f332cbc7914ca282cf4d2eeb9 (patch)
tree3654a6f6518fd66f0007528705ff72c553492bab
parent3cd70aa838d4412ca4a4a69564b590d7217fefb5 (diff)
parent929c76e4e6292770322822a50e08b8feaa4afbbf (diff)
Merge branch 'akpm-current/current'
Conflicts: arch/arm64/Kconfig drivers/rapidio/devices/tsi721_dma.c kernel/kexec.c net/batman-adv/fragmentation.c
-rw-r--r--Documentation/ABI/testing/sysfs-fs-nilfs2269
-rw-r--r--Documentation/RCU/whatisRCU.txt2
-rw-r--r--Documentation/cgroups/memcg_test.txt160
-rw-r--r--Documentation/devicetree/bindings/i2c/trivial-devices.txt1
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--Documentation/leds/leds-class.txt3
-rw-r--r--Documentation/oops-tracing.txt2
-rw-r--r--Documentation/printk-formats.txt6
-rw-r--r--Documentation/rapidio/tsi721.txt19
-rw-r--r--Documentation/sysctl/kernel.txt1
-rw-r--r--Documentation/trace/postprocess/trace-vmscan-postprocess.pl53
-rw-r--r--Makefile19
-rw-r--r--arch/alpha/include/asm/Kbuild1
-rw-r--r--arch/alpha/include/asm/scatterlist.h6
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/scatterlist.h12
-rw-r--r--arch/arm/mm/dma-mapping.c1
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/cris/include/asm/Kbuild1
-rw-r--r--arch/cris/include/asm/scatterlist.h6
-rw-r--r--arch/frv/include/asm/Kbuild1
-rw-r--r--arch/frv/include/asm/scatterlist.h6
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/ia64/include/asm/Kbuild1
-rw-r--r--arch/ia64/include/asm/scatterlist.h7
-rw-r--r--arch/ia64/kernel/time.c15
-rw-r--r--arch/ia64/mm/init.c3
-rw-r--r--arch/m32r/include/asm/Kbuild1
-rw-r--r--arch/m32r/include/asm/scatterlist.h6
-rw-r--r--arch/m68k/kernel/sys_m68k.c18
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/scatterlist.h1
-rw-r--r--arch/mn10300/include/asm/Kbuild1
-rw-r--r--arch/mn10300/include/asm/scatterlist.h16
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/scatterlist.h17
-rw-r--r--arch/powerpc/kvm/Makefile1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c19
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.c240
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.h27
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c1
-rw-r--r--arch/powerpc/mm/mem.c3
-rw-r--r--arch/powerpc/platforms/44x/warp.c1
-rw-r--r--arch/powerpc/platforms/52xx/efika.c1
-rw-r--r--arch/powerpc/platforms/amigaone/setup.c1
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/scatterlist.h3
-rw-r--r--arch/score/include/asm/Kbuild1
-rw-r--r--arch/score/include/asm/scatterlist.h6
-rw-r--r--arch/score/include/uapi/asm/ptrace.h11
-rw-r--r--arch/sh/drivers/dma/Kconfig5
-rw-r--r--arch/sh/include/cpu-sh4/cpu/dma-register.h1
-rw-r--r--arch/sh/include/cpu-sh4a/cpu/dma.h3
-rw-r--r--arch/sh/kernel/cpu/sh4a/clock-sh7724.c4
-rw-r--r--arch/sh/kernel/time.c4
-rw-r--r--arch/sh/mm/asids-debugfs.c4
-rw-r--r--arch/sh/mm/init.c5
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/scatterlist.h8
-rw-r--r--arch/tile/include/asm/hardwall.h2
-rw-r--r--arch/tile/kernel/hardwall.c6
-rw-r--r--arch/tile/kernel/module.c2
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/Kbuild3
-rw-r--r--arch/x86/include/asm/numa.h1
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/scatterlist.h8
-rw-r--r--arch/x86/mm/fault.c5
-rw-r--r--arch/x86/mm/init_32.c3
-rw-r--r--arch/x86/mm/init_64.c39
-rw-r--r--arch/x86/mm/numa.c34
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c36
-rw-r--r--block/bio-integrity.c2
-rw-r--r--block/genhd.c2
-rw-r--r--crypto/zlib.c8
-rw-r--r--drivers/ata/Kconfig1
-rw-r--r--drivers/ata/libata-core.c72
-rw-r--r--drivers/base/Kconfig10
-rw-r--r--drivers/base/dma-contiguous.c220
-rw-r--r--drivers/base/memory.c30
-rw-r--r--drivers/base/node.c2
-rw-r--r--drivers/block/zram/zram_drv.c71
-rw-r--r--drivers/block/zram/zram_drv.h29
-rw-r--r--drivers/firmware/memmap.c6
-rw-r--r--drivers/gpu/drm/drm_hashtab.c2
-rw-r--r--drivers/hwmon/asus_atk0110.c2
-rw-r--r--drivers/input/Kconfig9
-rw-r--r--drivers/input/Makefile3
-rw-r--r--drivers/input/input.c6
-rw-r--r--drivers/input/leds.c249
-rw-r--r--drivers/leds/Kconfig3
-rw-r--r--drivers/lguest/core.c7
-rw-r--r--drivers/misc/ti-st/st_core.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c2
-rw-r--r--drivers/net/irda/donauboe.c15
-rw-r--r--drivers/parport/parport_ip32.c2
-rw-r--r--drivers/rapidio/devices/tsi721.h12
-rw-r--r--drivers/rapidio/devices/tsi721_dma.c718
-rw-r--r--drivers/rapidio/rio.c66
-rw-r--r--drivers/rtc/Kconfig29
-rw-r--r--drivers/rtc/Makefile5
-rw-r--r--drivers/rtc/class.c16
-rw-r--r--drivers/rtc/interface.c2
-rw-r--r--drivers/rtc/rtc-ds1343.c75
-rw-r--r--drivers/rtc/rtc-ds1742.c2
-rw-r--r--drivers/rtc/rtc-efi-platform.c31
-rw-r--r--drivers/rtc/rtc-efi.c32
-rw-r--r--drivers/rtc/rtc-isl12022.c12
-rw-r--r--drivers/rtc/rtc-pcf85063.c204
-rw-r--r--drivers/rtc/rtc-pcf8563.c231
-rw-r--r--drivers/staging/android/binder.c4
-rw-r--r--drivers/staging/lustre/lustre/libcfs/hash.c4
-rw-r--r--drivers/tty/Kconfig4
-rw-r--r--drivers/tty/sysrq.c2
-rw-r--r--drivers/tty/vt/keyboard.c110
-rw-r--r--drivers/video/backlight/backlight.c2
-rw-r--r--fs/adfs/adfs.h1
-rw-r--r--fs/adfs/dir.c2
-rw-r--r--fs/adfs/dir_fplus.c9
-rw-r--r--fs/autofs4/autofs_i.h63
-rw-r--r--fs/autofs4/expire.c1
-rw-r--r--fs/autofs4/root.c10
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/bfs/inode.c8
-rw-r--r--fs/binfmt_elf.c21
-rw-r--r--fs/cachefiles/bind.c6
-rw-r--r--fs/cachefiles/daemon.c6
-rw-r--r--fs/cifs/cifsacl.c2
-rw-r--r--fs/cifs/cifssmb.c20
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smb2file.c4
-rw-r--r--fs/cifs/smb2misc.c38
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/cifs/smb2pdu.h28
-rw-r--r--fs/coda/cache.c2
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c3
-rw-r--r--fs/coda/file.c2
-rw-r--r--fs/coda/inode.c4
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/coda/psdev.c2
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/compat.c2
-rw-r--r--fs/cramfs/inode.c45
-rw-r--r--fs/cramfs/uncompress.c10
-rw-r--r--fs/dlm/debug_fs.c15
-rw-r--r--fs/efs/namei.c11
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/ore_raid.c2
-rw-r--r--fs/ext4/fsync.c5
-rw-r--r--fs/fscache/main.c4
-rw-r--r--fs/hfsplus/catalog.c89
-rw-r--r--fs/hfsplus/dir.c11
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hpfs/dnode.c17
-rw-r--r--fs/isofs/Makefile2
-rw-r--r--fs/isofs/compress.c10
-rw-r--r--fs/isofs/export.c11
-rw-r--r--fs/isofs/inode.c88
-rw-r--r--fs/isofs/namei.c5
-rw-r--r--fs/isofs/rock.c44
-rw-r--r--fs/jffs2/compr_zlib.c7
-rw-r--r--fs/logfs/readwrite.c15
-rw-r--r--fs/mpage.c23
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nilfs2/Makefile2
-rw-r--r--fs/nilfs2/nilfs.h8
-rw-r--r--fs/nilfs2/super.c11
-rw-r--r--fs/nilfs2/sysfs.c1137
-rw-r--r--fs/nilfs2/sysfs.h176
-rw-r--r--fs/nilfs2/the_nilfs.c17
-rw-r--r--fs/nilfs2/the_nilfs.h20
-rw-r--r--fs/notify/inode_mark.c2
-rw-r--r--fs/notify/vfsmount_mark.c2
-rw-r--r--fs/ntfs/file.c3
-rw-r--r--fs/ocfs2/alloc.c43
-rw-r--r--fs/ocfs2/alloc.h2
-rw-r--r--fs/ocfs2/aops.c37
-rw-r--r--fs/ocfs2/cluster/quorum.c13
-rw-r--r--fs/ocfs2/cluster/tcp.c45
-rw-r--r--fs/ocfs2/cluster/tcp.h1
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c5
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c26
-rw-r--r--fs/ocfs2/dlm/dlmthread.c10
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/inode.c10
-rw-r--r--fs/ocfs2/ioctl.c129
-rw-r--r--fs/ocfs2/move_extents.c2
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/slot_map.c2
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/proc/base.c181
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/generic.c32
-rw-r--r--fs/proc/internal.h9
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/proc_tty.c4
-rw-r--r--fs/proc/root.c4
-rw-r--r--fs/proc/vmcore.c82
-rw-r--r--fs/pstore/ram_core.c2
-rw-r--r--fs/qnx6/Makefile1
-rw-r--r--fs/qnx6/dir.c26
-rw-r--r--fs/qnx6/inode.c99
-rw-r--r--fs/qnx6/namei.c6
-rw-r--r--fs/qnx6/qnx6.h12
-rw-r--r--fs/qnx6/super_mmi.c22
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/do_balan.c2
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/ibalance.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/item_ops.c4
-rw-r--r--fs/reiserfs/lbalance.c2
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c2
-rw-r--r--fs/reiserfs/stree.c2
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/reiserfs/xattr.c22
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/romfs/super.c23
-rw-r--r--fs/squashfs/file_direct.c2
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/ufs/Makefile1
-rw-r--r--fs/ufs/inode.c32
-rw-r--r--fs/ufs/super.c304
-rw-r--r--fs/ufs/ufs.h10
-rw-r--r--include/linux/bitmap.h62
-rw-r--r--include/linux/byteorder/generic.h2
-rw-r--r--include/linux/cma.h27
-rw-r--r--include/linux/crc64_ecma.h56
-rw-r--r--include/linux/decompress/bunzip2.h8
-rw-r--r--include/linux/decompress/generic.h10
-rw-r--r--include/linux/decompress/inflate.h8
-rw-r--r--include/linux/decompress/unlz4.h8
-rw-r--r--include/linux/decompress/unlzma.h8
-rw-r--r--include/linux/decompress/unlzo.h8
-rw-r--r--include/linux/decompress/unxz.h8
-rw-r--r--include/linux/dma-contiguous.h11
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/glob.h10
-rw-r--r--include/linux/huge_mm.h4
-rw-r--r--include/linux/hugetlb.h1
-rw-r--r--include/linux/input.h21
-rw-r--r--include/linux/kernel.h38
-rw-r--r--include/linux/klist.h2
-rw-r--r--include/linux/list.h14
-rw-r--r--include/linux/memblock.h4
-rw-r--r--include/linux/memcontrol.h98
-rw-r--r--include/linux/memory_hotplug.h10
-rw-r--r--include/linux/mm_types.h1
-rw-r--r--include/linux/mmdebug.h2
-rw-r--r--include/linux/mmzone.h219
-rw-r--r--include/linux/nodemask.h11
-rw-r--r--include/linux/oom.h4
-rw-r--r--include/linux/page-flags.h21
-rw-r--r--include/linux/page_cgroup.h70
-rw-r--r--include/linux/pagemap.h3
-rw-r--r--include/linux/printk.h2
-rw-r--r--include/linux/rculist.h8
-rw-r--r--include/linux/rio_drv.h5
-rw-r--r--include/linux/scatterlist.h2
-rw-r--r--include/linux/sched.h13
-rw-r--r--include/linux/shm.h18
-rw-r--r--include/linux/string.h1
-rw-r--r--include/linux/swap.h16
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/user_namespace.h6
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--include/linux/zbud.h2
-rw-r--r--include/linux/zlib.h122
-rw-r--r--include/linux/zpool.h106
-rw-r--r--include/scsi/scsi.h2
-rw-r--r--include/trace/events/migrate.h1
-rw-r--r--include/trace/events/pagemap.h16
-rw-r--r--init/Kconfig46
-rw-r--r--init/do_mounts.c12
-rw-r--r--init/do_mounts_rd.c10
-rw-r--r--init/initramfs.c60
-rw-r--r--ipc/shm.c75
-rw-r--r--kernel/acct.c30
-rw-r--r--kernel/auditfilter.c4
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/events/uprobes.c15
-rw-r--r--kernel/exit.c50
-rw-r--r--kernel/fork.c77
-rw-r--r--kernel/gcov/fs.c3
-rw-r--r--kernel/kallsyms.c2
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/printk/printk.c157
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/test_kprobes.c87
-rw-r--r--kernel/user_namespace.c6
-rw-r--r--kernel/watchdog.c11
-rw-r--r--lib/Kconfig47
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/Makefile3
-rw-r--r--lib/bitmap.c111
-rw-r--r--lib/cmdline.c15
-rw-r--r--lib/crc64_ecma.c341
-rw-r--r--lib/decompress.c2
-rw-r--r--lib/decompress_bunzip2.c26
-rw-r--r--lib/decompress_inflate.c12
-rw-r--r--lib/decompress_unlz4.c83
-rw-r--r--lib/decompress_unlzma.c28
-rw-r--r--lib/decompress_unlzo.c12
-rw-r--r--lib/decompress_unxz.c10
-rw-r--r--lib/glob.c287
-rw-r--r--lib/idr.c25
-rw-r--r--lib/kfifo.c6
-rw-r--r--lib/klist.c6
-rw-r--r--lib/list_sort.c71
-rw-r--r--lib/rbtree.c2
-rw-r--r--lib/scatterlist.c4
-rw-r--r--lib/string_helpers.c15
-rw-r--r--lib/test-kstrtox.c2
-rw-r--r--lib/vsprintf.c20
-rw-r--r--lib/zlib_deflate/deflate.c143
-rw-r--r--lib/zlib_inflate/inflate.c132
-rw-r--r--mm/Kconfig54
-rw-r--r--mm/Makefile2
-rw-r--r--mm/cma.c335
-rw-r--r--mm/compaction.c17
-rw-r--r--mm/filemap.c52
-rw-r--r--mm/gup.c18
-rw-r--r--mm/huge_memory.c89
-rw-r--r--mm/hugetlb.c129
-rw-r--r--mm/hwpoison-inject.c3
-rw-r--r--mm/internal.h2
-rw-r--r--mm/madvise.c3
-rw-r--r--mm/memcontrol.c1675
-rw-r--r--mm/memory-failure.c10
-rw-r--r--mm/memory.c91
-rw-r--r--mm/memory_hotplug.c45
-rw-r--r--mm/migrate.c38
-rw-r--r--mm/mlock.c9
-rw-r--r--mm/mmap.c5
-rw-r--r--mm/oom_kill.c38
-rw-r--r--mm/page-writeback.c5
-rw-r--r--mm/page_alloc.c161
-rw-r--r--mm/readahead.c3
-rw-r--r--mm/rmap.c20
-rw-r--r--mm/shmem.c84
-rw-r--r--mm/slab.c514
-rw-r--r--mm/slab.h22
-rw-r--r--mm/slab_common.c101
-rw-r--r--mm/slub.c221
-rw-r--r--mm/swap.c54
-rw-r--r--mm/swap_state.c8
-rw-r--r--mm/swapfile.c21
-rw-r--r--mm/truncate.c9
-rw-r--r--mm/util.c132
-rw-r--r--mm/vmalloc.c30
-rw-r--r--mm/vmscan.c264
-rw-r--r--mm/vmstat.c150
-rw-r--r--mm/zbud.c98
-rw-r--r--mm/zpool.c364
-rw-r--r--mm/zsmalloc.c86
-rw-r--r--mm/zswap.c81
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv6/addrlabel.c2
-rw-r--r--net/xfrm/xfrm_policy.c4
-rwxr-xr-xscripts/checkpatch.pl570
-rwxr-xr-xscripts/checkstack.pl12
-rw-r--r--scripts/coccinelle/free/ifnullfree.cocci54
-rwxr-xr-xscripts/tags.sh2
387 files changed, 9627 insertions, 6020 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-nilfs2 b/Documentation/ABI/testing/sysfs-fs-nilfs2
new file mode 100644
index 000000000000..304ba84a973a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-nilfs2
@@ -0,0 +1,269 @@
+
+What: /sys/fs/nilfs2/features/revision
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show current revision of NILFS file system driver.
+ This value informs about file system revision that
+ driver is ready to support.
+
+What: /sys/fs/nilfs2/features/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/features group.
+
+What: /sys/fs/nilfs2/<device>/revision
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show NILFS file system revision on volume.
+ This value informs about metadata structures'
+ revision on mounted volume.
+
+What: /sys/fs/nilfs2/<device>/blocksize
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's block size in bytes.
+
+What: /sys/fs/nilfs2/<device>/device_size
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume size in bytes.
+
+What: /sys/fs/nilfs2/<device>/free_blocks
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of free blocks on volume.
+
+What: /sys/fs/nilfs2/<device>/uuid
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's UUID (Universally Unique Identifier).
+
+What: /sys/fs/nilfs2/<device>/volume_name
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's label.
+
+What: /sys/fs/nilfs2/<device>/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device> group.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show last write time of super block in human-readable
+ format.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show last write time of super block in seconds.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show current write count of super block.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_update_frequency
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show/Set interval of periodical update of superblock
+ (in seconds).
+
+What: /sys/fs/nilfs2/<device>/superblock/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/superblock
+ group.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_pseg_block
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show start block number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_sequence
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show sequence value of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show checkpoint number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/current_seg_sequence
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show segment sequence counter.
+
+What: /sys/fs/nilfs2/<device>/segctor/current_last_full_seg
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show index number of the latest full segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_full_seg
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show index number of the full segment index
+ to be used next.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_pseg_offset
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show offset of next partial segment in the current
+ full segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show next checkpoint number.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment in
+ human-readable format.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment in seconds.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment not for cleaner
+ operation in human-readable format.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment not for cleaner
+ operation in seconds.
+
+What: /sys/fs/nilfs2/<device>/segctor/dirty_data_blocks_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of dirty data blocks.
+
+What: /sys/fs/nilfs2/<device>/segctor/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/segctor
+ group.
+
+What: /sys/fs/nilfs2/<device>/segments/segments_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of segments on a volume.
+
+What: /sys/fs/nilfs2/<device>/segments/blocks_per_segment
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of blocks in segment.
+
+What: /sys/fs/nilfs2/<device>/segments/clean_segments
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of clean segments.
+
+What: /sys/fs/nilfs2/<device>/segments/dirty_segments
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of dirty segments.
+
+What: /sys/fs/nilfs2/<device>/segments/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/segments
+ group.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/checkpoints_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of checkpoints on volume.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/snapshots_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of snapshots on volume.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/last_seg_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show checkpoint number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/next_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show next checkpoint number.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/checkpoints
+ group.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe content of /sys/fs/nilfs2/<device>/mounted_snapshots
+ group.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/inodes_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of inodes for snapshot.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/blocks_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of blocks for snapshot.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/mounted_snapshots/<id>
+ group.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 49b8551a3b68..e48c57f1943b 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -818,7 +818,7 @@ RCU pointer/list update:
list_add_tail_rcu
list_del_rcu
list_replace_rcu
- hlist_add_after_rcu
+ hlist_add_behind_rcu
hlist_add_before_rcu
hlist_add_head_rcu
hlist_del_rcu
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index 80ac454704b8..8870b0212150 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -24,64 +24,27 @@ Please note that implementation details can be changed.
a page/swp_entry may be charged (usage += PAGE_SIZE) at
- mem_cgroup_charge_anon()
- Called at new page fault and Copy-On-Write.
-
- mem_cgroup_try_charge_swapin()
- Called at do_swap_page() (page fault on swap entry) and swapoff.
- Followed by charge-commit-cancel protocol. (With swap accounting)
- At commit, a charge recorded in swap_cgroup is removed.
-
- mem_cgroup_charge_file()
- Called at add_to_page_cache()
-
- mem_cgroup_cache_charge_swapin()
- Called at shmem's swapin.
-
- mem_cgroup_prepare_migration()
- Called before migration. "extra" charge is done and followed by
- charge-commit-cancel protocol.
- At commit, charge against oldpage or newpage will be committed.
+ mem_cgroup_try_charge()
2. Uncharge
a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
- mem_cgroup_uncharge_page()
- Called when an anonymous page is fully unmapped. I.e., mapcount goes
- to 0. If the page is SwapCache, uncharge is delayed until
- mem_cgroup_uncharge_swapcache().
-
- mem_cgroup_uncharge_cache_page()
- Called when a page-cache is deleted from radix-tree. If the page is
- SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache().
-
- mem_cgroup_uncharge_swapcache()
- Called when SwapCache is removed from radix-tree. The charge itself
- is moved to swap_cgroup. (If mem+swap controller is disabled, no
- charge to swap occurs.)
+ mem_cgroup_uncharge()
+ Called when a page's refcount goes down to 0.
mem_cgroup_uncharge_swap()
Called when swp_entry's refcnt goes down to 0. A charge against swap
disappears.
- mem_cgroup_end_migration(old, new)
- At success of migration old is uncharged (if necessary), a charge
- to new page is committed. At failure, charge to old page is committed.
-
3. charge-commit-cancel
- In some case, we can't know this "charge" is valid or not at charging
- (because of races).
- To handle such case, there are charge-commit-cancel functions.
- mem_cgroup_try_charge_XXX
- mem_cgroup_commit_charge_XXX
- mem_cgroup_cancel_charge_XXX
- these are used in swap-in and migration.
+ Memcg pages are charged in two steps:
+ mem_cgroup_try_charge()
+ mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
At try_charge(), there are no flags to say "this page is charged".
at this point, usage += PAGE_SIZE.
- At commit(), the function checks the page should be charged or not
- and set flags or avoid charging.(usage -= PAGE_SIZE)
+ At commit(), the page is associated with the memcg.
At cancel(), simply usage -= PAGE_SIZE.
@@ -91,18 +54,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
Anonymous page is newly allocated at
- page fault into MAP_ANONYMOUS mapping.
- Copy-On-Write.
- It is charged right after it's allocated before doing any page table
- related operations. Of course, it's uncharged when another page is used
- for the fault address.
-
- At freeing anonymous page (by exit() or munmap()), zap_pte() is called
- and pages for ptes are freed one by one.(see mm/memory.c). Uncharges
- are done at page_remove_rmap() when page_mapcount() goes down to 0.
-
- Another page freeing is by page-reclaim (vmscan.c) and anonymous
- pages are swapped out. In this case, the page is marked as
- PageSwapCache(). uncharge() routine doesn't uncharge the page marked
- as SwapCache(). It's delayed until __delete_from_swap_cache().
4.1 Swap-in.
At swap-in, the page is taken from swap-cache. There are 2 cases.
@@ -111,41 +62,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
(b) If the SwapCache has been mapped by processes, it has been
charged already.
- This swap-in is one of the most complicated work. In do_swap_page(),
- following events occur when pte is unchanged.
-
- (1) the page (SwapCache) is looked up.
- (2) lock_page()
- (3) try_charge_swapin()
- (4) reuse_swap_page() (may call delete_swap_cache())
- (5) commit_charge_swapin()
- (6) swap_free().
-
- Considering following situation for example.
-
- (A) The page has not been charged before (2) and reuse_swap_page()
- doesn't call delete_from_swap_cache().
- (B) The page has not been charged before (2) and reuse_swap_page()
- calls delete_from_swap_cache().
- (C) The page has been charged before (2) and reuse_swap_page() doesn't
- call delete_from_swap_cache().
- (D) The page has been charged before (2) and reuse_swap_page() calls
- delete_from_swap_cache().
-
- memory.usage/memsw.usage changes to this page/swp_entry will be
- Case (A) (B) (C) (D)
- Event
- Before (2) 0/ 1 0/ 1 1/ 1 1/ 1
- ===========================================
- (3) +1/+1 +1/+1 +1/+1 +1/+1
- (4) - 0/ 0 - -1/ 0
- (5) 0/-1 0/ 0 -1/-1 0/ 0
- (6) - 0/-1 - 0/-1
- ===========================================
- Result 1/ 1 1/ 1 1/ 1 1/ 1
-
- In any cases, charges to this page should be 1/ 1.
-
4.2 Swap-out.
At swap-out, typical state transition is below.
@@ -158,28 +74,20 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
swp_entry's refcnt -= 1.
- At (b), the page is marked as SwapCache and not uncharged.
- At (d), the page is removed from SwapCache and a charge in page_cgroup
- is moved to swap_cgroup.
-
Finally, at task exit,
(e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
- Here, a charge in swap_cgroup disappears.
5. Page Cache
Page Cache is charged at
- add_to_page_cache_locked().
- uncharged at
- - __remove_from_page_cache().
-
The logic is very clear. (About migration, see below)
Note: __remove_from_page_cache() is called by remove_from_page_cache()
and __remove_mapping().
6. Shmem(tmpfs) Page Cache
- Memcg's charge/uncharge have special handlers of shmem. The best way
- to understand shmem's page state transition is to read mm/shmem.c.
+ The best way to understand shmem's page state transition is to read
+ mm/shmem.c.
But brief explanation of the behavior of memcg around shmem will be
helpful to understand the logic.
@@ -192,56 +100,10 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
It's charged when...
- A new page is added to shmem's radix-tree.
- A swp page is read. (move a charge from swap_cgroup to page_cgroup)
- It's uncharged when
- - A page is removed from radix-tree and not SwapCache.
- - When SwapCache is removed, a charge is moved to swap_cgroup.
- - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup
- disappears.
7. Page Migration
- One of the most complicated functions is page-migration-handler.
- Memcg has 2 routines. Assume that we are migrating a page's contents
- from OLDPAGE to NEWPAGE.
-
- Usual migration logic is..
- (a) remove the page from LRU.
- (b) allocate NEWPAGE (migration target)
- (c) lock by lock_page().
- (d) unmap all mappings.
- (e-1) If necessary, replace entry in radix-tree.
- (e-2) move contents of a page.
- (f) map all mappings again.
- (g) pushback the page to LRU.
- (-) OLDPAGE will be freed.
-
- Before (g), memcg should complete all necessary charge/uncharge to
- NEWPAGE/OLDPAGE.
-
- The point is....
- - If OLDPAGE is anonymous, all charges will be dropped at (d) because
- try_to_unmap() drops all mapcount and the page will not be
- SwapCache.
-
- - If OLDPAGE is SwapCache, charges will be kept at (g) because
- __delete_from_swap_cache() isn't called at (e-1)
-
- - If OLDPAGE is page-cache, charges will be kept at (g) because
- remove_from_swap_cache() isn't called at (e-1)
-
- memcg provides following hooks.
-
- - mem_cgroup_prepare_migration(OLDPAGE)
- Called after (b) to account a charge (usage += PAGE_SIZE) against
- memcg which OLDPAGE belongs to.
-
- - mem_cgroup_end_migration(OLDPAGE, NEWPAGE)
- Called after (f) before (g).
- If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already
- charged, a charge by prepare_migration() is automatically canceled.
- If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE.
-
- But zap_pte() (by exit or munmap) can be called while migration,
- we have to check if OLDPAGE/NEWPAGE is a valid page after commit().
+
+ mem_cgroup_migrate()
8. LRU
Each memcg has its own private LRU. Now, its handling is under global
diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
index 37803eb5521e..6af570ec53b4 100644
--- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt
+++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
@@ -70,6 +70,7 @@ nuvoton,npct501 i2c trusted platform module (TPM)
nxp,pca9556 Octal SMBus and I2C registered interface
nxp,pca9557 8-bit I2C-bus and SMBus I/O port with reset
nxp,pcf8563 Real-time clock/calendar
+nxp,pcf85063 Tiny Real-Time Clock
ovti,ov5642 OV5642: Color CMOS QSXGA (5-megapixel) Image Sensor with OmniBSI and Embedded TrueFocus
pericom,pt7c4338 Real-time Clock Module
plx,pex8648 48-Lane, 12-Port PCI Express Gen 2 (5.0 GT/s) Switch
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 572ad0268b0e..5ae8608ca9f5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1722,8 +1722,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
7 (KERN_DEBUG) debug-level messages
log_buf_len=n[KMG] Sets the size of the printk ring buffer,
- in bytes. n must be a power of two. The default
- size is set in the kernel config file.
+ in bytes. n must be a power of two and greater
+ than the minimal size. The minimal size is defined
+ by LOG_BUF_SHIFT kernel config parameter. There is
+ also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
+ that allows to increase the default size depending on
+ the number of CPUs. See init/Kconfig for more details.
logo.nologo [FB] Disables display of the built-in Linux logo.
This may be used to provide more screen space for
diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt
index 79699c200766..62261c04060a 100644
--- a/Documentation/leds/leds-class.txt
+++ b/Documentation/leds/leds-class.txt
@@ -2,9 +2,6 @@
LED handling under Linux
========================
-If you're reading this and thinking about keyboard leds, these are
-handled by the input subsystem and the led class is *not* needed.
-
In its simplest form, the LED class just allows control of LEDs from
userspace. LEDs appear in /sys/class/leds/. The maximum brightness of the
LED is defined in max_brightness file. The brightness file will set the brightness
diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt
index e3155995ddd8..beefb9f82902 100644
--- a/Documentation/oops-tracing.txt
+++ b/Documentation/oops-tracing.txt
@@ -268,6 +268,8 @@ characters, each representing a particular tainted value.
14: 'E' if an unsigned module has been loaded in a kernel supporting
module signature.
+ 15: 'L' if a soft lockup has previously occurred on the system.
+
The primary reason for the 'Tainted: ' string is to tell kernel
debuggers if this is a clean kernel or if anything unusual has
occurred. Tainting is permanent: even if an offending module is
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index b4498218c474..3b56a991f52e 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -184,6 +184,12 @@ dentry names:
equivalent of %s dentry->d_name.name we used to use, %pd<n> prints
n last components. %pD does the same thing for struct file.
+task_struct comm name:
+
+ %pT
+
+ For printing task_struct->comm.
+
struct va_format:
%pV
diff --git a/Documentation/rapidio/tsi721.txt b/Documentation/rapidio/tsi721.txt
index 335f3c6087dc..626052f403bb 100644
--- a/Documentation/rapidio/tsi721.txt
+++ b/Documentation/rapidio/tsi721.txt
@@ -20,13 +20,26 @@ II. Known problems
None.
-III. To do
+III. DMA Engine Support
- Add DMA data transfers (non-messaging).
- Add inbound region (SRIO-to-PCIe) mapping.
+Tsi721 mport driver supports DMA data transfers between local system memory and
+remote RapidIO devices. This functionality is implemented according to SLAVE
+mode API defined by common Linux kernel DMA Engine framework.
+
+Depending on system requirements RapidIO DMA operations can be included/excluded
+by setting CONFIG_RAPIDIO_DMA_ENGINE option. Tsi721 miniport driver uses seven
+out of eight available BDMA channels to support DMA data transfers.
+One BDMA channel is reserved for generation of maintenance read/write requests.
+
+If Tsi721 mport driver have been built with RAPIDIO_DMA_ENGINE support included,
+this driver will accept DMA-specific module parameter:
+ "dma_desc_per_channel" - defines number of hardware buffer descriptors used by
+ each BDMA channel of Tsi721 (by default - 128).
IV. Version History
+ 1.1.0 - DMA operations re-worked to support data scatter/gather lists larger
+ than hardware buffer descriptors ring.
1.0.0 - Initial driver release.
V. License
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index c14374e71775..f79eb9666379 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -826,6 +826,7 @@ can be ORed together:
4096 - An out-of-tree module has been loaded.
8192 - An unsigned module has been loaded in a kernel supporting module
signature.
+16384 - A soft lockup has previously occurred on the system.
==============================================================
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
index 78c9a7b2b58f..8f961ef2b457 100644
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -47,6 +47,10 @@ use constant HIGH_KSWAPD_REWAKEUP => 21;
use constant HIGH_NR_SCANNED => 22;
use constant HIGH_NR_TAKEN => 23;
use constant HIGH_NR_RECLAIMED => 24;
+use constant HIGH_NR_FILE_SCANNED => 25;
+use constant HIGH_NR_ANON_SCANNED => 26;
+use constant HIGH_NR_FILE_RECLAIMED => 27;
+use constant HIGH_NR_ANON_RECLAIMED => 28;
my %perprocesspid;
my %perprocess;
@@ -56,14 +60,18 @@ my $opt_read_procstat;
my $total_wakeup_kswapd;
my ($total_direct_reclaim, $total_direct_nr_scanned);
+my ($total_direct_nr_file_scanned, $total_direct_nr_anon_scanned);
my ($total_direct_latency, $total_kswapd_latency);
my ($total_direct_nr_reclaimed);
+my ($total_direct_nr_file_reclaimed, $total_direct_nr_anon_reclaimed);
my ($total_direct_writepage_file_sync, $total_direct_writepage_file_async);
my ($total_direct_writepage_anon_sync, $total_direct_writepage_anon_async);
my ($total_kswapd_nr_scanned, $total_kswapd_wake);
+my ($total_kswapd_nr_file_scanned, $total_kswapd_nr_anon_scanned);
my ($total_kswapd_writepage_file_sync, $total_kswapd_writepage_file_async);
my ($total_kswapd_writepage_anon_sync, $total_kswapd_writepage_anon_async);
my ($total_kswapd_nr_reclaimed);
+my ($total_kswapd_nr_file_reclaimed, $total_kswapd_nr_anon_reclaimed);
# Catch sigint and exit on request
my $sigint_report = 0;
@@ -374,6 +382,7 @@ EVENT_PROCESS:
}
my $isolate_mode = $1;
my $nr_scanned = $4;
+ my $file = $6;
# To closer match vmstat scanning statistics, only count isolate_both
# and isolate_inactive as scanning. isolate_active is rotation
@@ -382,6 +391,11 @@ EVENT_PROCESS:
# isolate_both == 3
if ($isolate_mode != 2) {
$perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
+ if ($file == 1) {
+ $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned;
+ } else {
+ $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned;
+ }
}
} elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
$details = $6;
@@ -391,8 +405,19 @@ EVENT_PROCESS:
print " $regex_lru_shrink_inactive/o\n";
next;
}
+
my $nr_reclaimed = $4;
+ my $flags = $6;
+ my $file = 0;
+ if ($flags =~ /RECLAIM_WB_FILE/) {
+ $file = 1;
+ }
$perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed;
+ if ($file) {
+ $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED} += $nr_reclaimed;
+ } else {
+ $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED} += $nr_reclaimed;
+ }
} elsif ($tracepoint eq "mm_vmscan_writepage") {
$details = $6;
if ($details !~ /$regex_writepage/o) {
@@ -493,7 +518,11 @@ sub dump_stats {
$total_direct_reclaim += $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN};
$total_wakeup_kswapd += $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
$total_direct_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+ $total_direct_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+ $total_direct_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
$total_direct_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+ $total_direct_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+ $total_direct_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
$total_direct_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
$total_direct_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
$total_direct_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -513,7 +542,11 @@ sub dump_stats {
$stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN},
$stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD},
$stats{$process_pid}->{HIGH_NR_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
$stats{$process_pid}->{HIGH_NR_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC},
$this_reclaim_delay / 1000);
@@ -552,7 +585,11 @@ sub dump_stats {
$total_kswapd_wake += $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE};
$total_kswapd_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+ $total_kswapd_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+ $total_kswapd_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
$total_kswapd_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+ $total_kswapd_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+ $total_kswapd_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
$total_kswapd_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
$total_kswapd_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
$total_kswapd_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -563,7 +600,11 @@ sub dump_stats {
$stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE},
$stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP},
$stats{$process_pid}->{HIGH_NR_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
$stats{$process_pid}->{HIGH_NR_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC});
@@ -594,7 +635,11 @@ sub dump_stats {
print "\nSummary\n";
print "Direct reclaims: $total_direct_reclaim\n";
print "Direct reclaim pages scanned: $total_direct_nr_scanned\n";
+ print "Direct reclaim file pages scanned: $total_direct_nr_file_scanned\n";
+ print "Direct reclaim anon pages scanned: $total_direct_nr_anon_scanned\n";
print "Direct reclaim pages reclaimed: $total_direct_nr_reclaimed\n";
+ print "Direct reclaim file pages reclaimed: $total_direct_nr_file_reclaimed\n";
+ print "Direct reclaim anon pages reclaimed: $total_direct_nr_anon_reclaimed\n";
print "Direct reclaim write file sync I/O: $total_direct_writepage_file_sync\n";
print "Direct reclaim write anon sync I/O: $total_direct_writepage_anon_sync\n";
print "Direct reclaim write file async I/O: $total_direct_writepage_file_async\n";
@@ -604,7 +649,11 @@ sub dump_stats {
print "\n";
print "Kswapd wakeups: $total_kswapd_wake\n";
print "Kswapd pages scanned: $total_kswapd_nr_scanned\n";
+ print "Kswapd file pages scanned: $total_kswapd_nr_file_scanned\n";
+ print "Kswapd anon pages scanned: $total_kswapd_nr_anon_scanned\n";
print "Kswapd pages reclaimed: $total_kswapd_nr_reclaimed\n";
+ print "Kswapd file pages reclaimed: $total_kswapd_nr_file_reclaimed\n";
+ print "Kswapd anon pages reclaimed: $total_kswapd_nr_anon_reclaimed\n";
print "Kswapd reclaim write file sync I/O: $total_kswapd_writepage_file_sync\n";
print "Kswapd reclaim write anon sync I/O: $total_kswapd_writepage_anon_sync\n";
print "Kswapd reclaim write file async I/O: $total_kswapd_writepage_file_async\n";
@@ -629,7 +678,11 @@ sub aggregate_perprocesspid() {
$perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD} += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
$perprocess{$process}->{HIGH_KSWAPD_REWAKEUP} += $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP};
$perprocess{$process}->{HIGH_NR_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_SCANNED};
+ $perprocess{$process}->{HIGH_NR_FILE_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED};
+ $perprocess{$process}->{HIGH_NR_ANON_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED};
$perprocess{$process}->{HIGH_NR_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED};
+ $perprocess{$process}->{HIGH_NR_FILE_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+ $perprocess{$process}->{HIGH_NR_ANON_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
$perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
$perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
$perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
diff --git a/Makefile b/Makefile
index c076c33e3944..8a2217bc1948 100644
--- a/Makefile
+++ b/Makefile
@@ -638,6 +638,9 @@ else
KBUILD_CFLAGS += -O2
endif
+# Tell gcc to never replace conditional load with a non-conditional one
+KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0)
+
ifdef CONFIG_READABLE_ASM
# Disable optimizations that make assembler listings hard to read.
# reorder blocks reorders the control in the function
@@ -653,6 +656,22 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
endif
# Handle stack protector mode.
+#
+# Since kbuild can potentially perform two passes (first with the old
+# .config values and then with updated .config values), we cannot error out
+# if a desired compiler option is unsupported. If we were to error, kbuild
+# could never get to the second pass and actually notice that we changed
+# the option to something that was supported.
+#
+# Additionally, we don't want to fallback and/or silently change which compiler
+# flags will be used, since that leads to producing kernels with different
+# security feature characteristics depending on the compiler used. ("But I
+# selected CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!")
+#
+# The middle ground is to warn here so that the failed option is obvious, but
+# to let the build fail with bad compiler flags so that we can't produce a
+# kernel when there is a CONFIG and compiler mismatch.
+#
ifdef CONFIG_CC_STACKPROTECTOR_REGULAR
stackp-flag := -fstack-protector
ifeq ($(call cc-option, $(stackp-flag)),)
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 96e54bed5088..e858aa0ad8af 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/alpha/include/asm/scatterlist.h b/arch/alpha/include/asm/scatterlist.h
deleted file mode 100644
index 017d7471c3c4..000000000000
--- a/arch/alpha/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ALPHA_SCATTERLIST_H
-#define _ALPHA_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !(_ALPHA_SCATTERLIST_H) */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 916cedbd7a67..32cbbd565902 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -83,6 +83,7 @@ config ARM
<http://www.arm.linux.org.uk/>.
config ARM_HAS_SG_CHAIN
+ select ARCH_HAS_SG_CHAIN
bool
config NEED_SG_DMA_LENGTH
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index f5a357601983..70cd84eb7fda 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += poll.h
generic-y += preempt.h
generic-y += resource.h
generic-y += rwsem.h
+generic-y += scatterlist.h
generic-y += sections.h
generic-y += segment.h
generic-y += sembuf.h
diff --git a/arch/arm/include/asm/scatterlist.h b/arch/arm/include/asm/scatterlist.h
deleted file mode 100644
index cefdb8f898a1..000000000000
--- a/arch/arm/include/asm/scatterlist.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASMARM_SCATTERLIST_H
-#define _ASMARM_SCATTERLIST_H
-
-#ifdef CONFIG_ARM_HAS_SG_CHAIN
-#define ARCH_HAS_SG_CHAIN
-#endif
-
-#include <asm/memory.h>
-#include <asm/types.h>
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASMARM_SCATTERLIST_H */
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1f88db06b133..7a996aaa061e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -26,6 +26,7 @@
#include <linux/io.h>
#include <linux/vmalloc.h>
#include <linux/sizes.h>
+#include <linux/cma.h>
#include <asm/memory.h>
#include <asm/highmem.h>
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a5dc5ff145a3..62b4ae1c5bbf 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,6 +1,7 @@
config ARM64
def_bool y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+ select ARCH_HAS_SG_CHAIN
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_SUPPORTS_ATOMIC_RMW
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index afff5105909d..31742dfadff9 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += linkage.h
generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vga.h
generic-y += xor.h
diff --git a/arch/cris/include/asm/scatterlist.h b/arch/cris/include/asm/scatterlist.h
deleted file mode 100644
index f11f8f40ec4a..000000000000
--- a/arch/cris/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_CRIS_SCATTERLIST_H
-#define __ASM_CRIS_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !(__ASM_CRIS_SCATTERLIST_H) */
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 87b95eb8aee5..5b73921b6e9d 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -5,4 +5,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/frv/include/asm/scatterlist.h b/arch/frv/include/asm/scatterlist.h
deleted file mode 100644
index 0e5eb3018468..000000000000
--- a/arch/frv/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCATTERLIST_H
-#define _ASM_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !_ASM_SCATTERLIST_H */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 44a6915ab13d..c84c88bbbbd7 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,6 +28,7 @@ config IA64
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_VIRT_CPU_ACCOUNTING
+ select ARCH_HAS_SG_CHAIN
select VIRT_TO_BUS
select ARCH_DISCARD_MEMBLOCK
select GENERIC_IRQ_PROBE
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 0da4aa2602ae..e8317d2d6c8d 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -5,5 +5,6 @@ generic-y += hash.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vtime.h
diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h
deleted file mode 100644
index 08fd93bff1db..000000000000
--- a/arch/ia64/include/asm/scatterlist.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_IA64_SCATTERLIST_H
-#define _ASM_IA64_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_IA64_SCATTERLIST_H */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 3e71ef85e439..9a0104a38cd3 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -384,21 +384,6 @@ static struct irqaction timer_irqaction = {
.name = "timer"
};
-static struct platform_device rtc_efi_dev = {
- .name = "rtc-efi",
- .id = -1,
-};
-
-static int __init rtc_init(void)
-{
- if (platform_device_register(&rtc_efi_dev) < 0)
- printk(KERN_ERR "unable to register rtc device...\n");
-
- /* not necessarily an error */
- return 0;
-}
-module_init(rtc_init);
-
void read_persistent_clock(struct timespec *ts)
{
efi_gettimeofday(ts);
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 25c350264a41..892d43e32f3b 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -631,7 +631,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
pgdat = NODE_DATA(nid);
- zone = pgdat->node_zones + ZONE_NORMAL;
+ zone = pgdat->node_zones +
+ zone_for_memory(nid, start, size, ZONE_NORMAL);
ret = __add_pages(nid, zone, start_pfn, nr_pages);
if (ret)
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 67779a74b62d..accc10a3dc78 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/m32r/include/asm/scatterlist.h b/arch/m32r/include/asm/scatterlist.h
deleted file mode 100644
index 7370b8b6243e..000000000000
--- a/arch/m32r/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_M32R_SCATTERLIST_H
-#define _ASM_M32R_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_M32R_SCATTERLIST_H */
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 3a480b3df0d6..d2263a0bdc3d 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -376,7 +376,6 @@ cache_flush_060 (unsigned long addr, int scope, int cache, unsigned long len)
asmlinkage int
sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
{
- struct vm_area_struct *vma;
int ret = -EINVAL;
if (scope < FLUSH_SCOPE_LINE || scope > FLUSH_SCOPE_ALL ||
@@ -389,16 +388,23 @@ sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
if (!capable(CAP_SYS_ADMIN))
goto out;
} else {
+ struct vm_area_struct *vma;
+ bool invalid;
+
+ /* Check for overflow. */
+ if (addr + len < addr)
+ goto out;
+
/*
* Verify that the specified address region actually belongs
* to this process.
*/
- vma = find_vma (current->mm, addr);
ret = -EINVAL;
- /* Check for overflow. */
- if (addr + len < addr)
- goto out;
- if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end)
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, addr);
+ invalid = !vma || addr < vma->vm_start || addr + len > vma->vm_end;
+ up_read(&current->mm->mmap_sem);
+ if (invalid)
goto out;
}
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 35b3ecaf25d5..27a3acda6c19 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -7,5 +7,6 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += syscalls.h
generic-y += trace_clock.h
diff --git a/arch/microblaze/include/asm/scatterlist.h b/arch/microblaze/include/asm/scatterlist.h
deleted file mode 100644
index 35d786fe93ae..000000000000
--- a/arch/microblaze/include/asm/scatterlist.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/scatterlist.h>
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 654d5ba6e310..ecbd6676bd33 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/mn10300/include/asm/scatterlist.h b/arch/mn10300/include/asm/scatterlist.h
deleted file mode 100644
index 7baa4006008a..000000000000
--- a/arch/mn10300/include/asm/scatterlist.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* MN10300 Scatterlist definitions
- *
- * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#ifndef _ASM_SCATTERLIST_H
-#define _ASM_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_SCATTERLIST_H */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 80b94b0add1f..4bc7b62fb4b6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -111,6 +111,7 @@ config PPC
select HAVE_DMA_API_DEBUG
select HAVE_OPROFILE
select HAVE_DEBUG_KMEMLEAK
+ select ARCH_HAS_SG_CHAIN
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 3fb1bc432f4f..7f23f162ce9c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,5 +4,6 @@ generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
generic-y += rwsem.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vtime.h
diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h
deleted file mode 100644
index de1f620bd5c9..000000000000
--- a/arch/powerpc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _ASM_POWERPC_SCATTERLIST_H
-#define _ASM_POWERPC_SCATTERLIST_H
-/*
- * Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/dma.h>
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_POWERPC_SCATTERLIST_H */
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2d590dea5482..0570eef83fba 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -81,7 +81,6 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
book3s_hv_builtin.o \
- book3s_hv_cma.o \
$(kvm-book3s_64-builtin-xics-objs-y)
endif
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index e3d17f571085..72c20bb16d26 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,6 @@
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
-#include "book3s_hv_cma.h"
-
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
@@ -64,10 +62,10 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
}
kvm->arch.hpt_cma_alloc = 0;
- VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ memset((void *)hpt, 0, (1 << order));
kvm->arch.hpt_cma_alloc = 1;
}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 3b41447482e5..329d7fdd0a6a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,12 +16,14 @@
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/sizes.h>
+#include <linux/cma.h>
#include <asm/cputable.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include "book3s_hv_cma.h"
+#define KVM_CMA_CHUNK_ORDER 18
+
/*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
* should be power of 2.
@@ -43,6 +45,8 @@ static unsigned long kvm_cma_resv_ratio = 5;
unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
EXPORT_SYMBOL_GPL(kvm_rma_pages);
+static struct cma *kvm_cma;
+
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Assumes POWER7 or PPC970. */
static inline int lpcr_rmls(unsigned long rma_size)
@@ -97,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma()
ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
if (!ri)
return NULL;
- page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+ page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
if (!page)
goto err_out;
atomic_set(&ri->use_count, 1);
@@ -112,7 +116,7 @@ EXPORT_SYMBOL_GPL(kvm_alloc_rma);
void kvm_release_rma(struct kvm_rma_info *ri)
{
if (atomic_dec_and_test(&ri->use_count)) {
- kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+ cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
kfree(ri);
}
}
@@ -131,16 +135,18 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages)
{
unsigned long align_pages = HPT_ALIGN_PAGES;
+ VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
/* Old CPUs require HPT aligned on a multiple of its size */
if (!cpu_has_feature(CPU_FTR_ARCH_206))
align_pages = nr_pages;
- return kvm_alloc_cma(nr_pages, align_pages);
+ return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
}
EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
void kvm_release_hpt(struct page *page, unsigned long nr_pages)
{
- kvm_release_cma(page, nr_pages);
+ cma_release(kvm_cma, page, nr_pages);
}
EXPORT_SYMBOL_GPL(kvm_release_hpt);
@@ -179,7 +185,8 @@ void __init kvm_cma_reserve(void)
align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
- kvm_cma_declare_contiguous(selected_size, align_size);
+ cma_declare_contiguous(0, selected_size, 0, align_size,
+ KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
}
}
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
deleted file mode 100644
index d9d3d8553d51..000000000000
--- a/arch/powerpc/kvm/book3s_hv_cma.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-#define pr_fmt(fmt) "kvm_cma: " fmt
-
-#ifdef CONFIG_CMA_DEBUG
-#ifndef DEBUG
-# define DEBUG
-#endif
-#endif
-
-#include <linux/memblock.h>
-#include <linux/mutex.h>
-#include <linux/sizes.h>
-#include <linux/slab.h>
-
-#include "book3s_hv_cma.h"
-
-struct kvm_cma {
- unsigned long base_pfn;
- unsigned long count;
- unsigned long *bitmap;
-};
-
-static DEFINE_MUTEX(kvm_cma_mutex);
-static struct kvm_cma kvm_cma_area;
-
-/**
- * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
- * for kvm hash pagetable
- * @size: Size of the reserved memory.
- * @alignment: Alignment for the contiguous memory area
- *
- * This function reserves memory for kvm cma area. It should be
- * called by arch code when early allocator (memblock or bootmem)
- * is still activate.
- */
-long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
-{
- long base_pfn;
- phys_addr_t addr;
- struct kvm_cma *cma = &kvm_cma_area;
-
- pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
-
- if (!size)
- return -EINVAL;
- /*
- * Sanitise input arguments.
- * We should be pageblock aligned for CMA.
- */
- alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
- size = ALIGN(size, alignment);
- /*
- * Reserve memory
- * Use __memblock_alloc_base() since
- * memblock_alloc_base() panic()s.
- */
- addr = __memblock_alloc_base(size, alignment, 0);
- if (!addr) {
- base_pfn = -ENOMEM;
- goto err;
- } else
- base_pfn = PFN_DOWN(addr);
-
- /*
- * Each reserved area must be initialised later, when more kernel
- * subsystems (like slab allocator) are available.
- */
- cma->base_pfn = base_pfn;
- cma->count = size >> PAGE_SHIFT;
- pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
- return 0;
-err:
- pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
- return base_pfn;
-}
-
-/**
- * kvm_alloc_cma() - allocate pages from contiguous area
- * @nr_pages: Requested number of pages.
- * @align_pages: Requested alignment in number of pages
- *
- * This function allocates memory buffer for hash pagetable.
- */
-struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
-{
- int ret;
- struct page *page = NULL;
- struct kvm_cma *cma = &kvm_cma_area;
- unsigned long chunk_count, nr_chunk;
- unsigned long mask, pfn, pageno, start = 0;
-
-
- if (!cma || !cma->count)
- return NULL;
-
- pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
- (void *)cma, nr_pages, align_pages);
-
- if (!nr_pages)
- return NULL;
- /*
- * align mask with chunk size. The bit tracks pages in chunk size
- */
- VM_BUG_ON(!is_power_of_2(align_pages));
- mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
- BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
-
- chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
- nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-
- mutex_lock(&kvm_cma_mutex);
- for (;;) {
- pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
- start, nr_chunk, mask);
- if (pageno >= chunk_count)
- break;
-
- pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
- ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
- if (ret == 0) {
- bitmap_set(cma->bitmap, pageno, nr_chunk);
- page = pfn_to_page(pfn);
- memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
- break;
- } else if (ret != -EBUSY) {
- break;
- }
- pr_debug("%s(): memory range at %p is busy, retrying\n",
- __func__, pfn_to_page(pfn));
- /* try again with a bit different memory target */
- start = pageno + mask + 1;
- }
- mutex_unlock(&kvm_cma_mutex);
- pr_debug("%s(): returned %p\n", __func__, page);
- return page;
-}
-
-/**
- * kvm_release_cma() - release allocated pages for hash pagetable
- * @pages: Allocated pages.
- * @nr_pages: Number of allocated pages.
- *
- * This function releases memory allocated by kvm_alloc_cma().
- * It returns false when provided pages do not belong to contiguous area and
- * true otherwise.
- */
-bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
-{
- unsigned long pfn;
- unsigned long nr_chunk;
- struct kvm_cma *cma = &kvm_cma_area;
-
- if (!cma || !pages)
- return false;
-
- pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
-
- pfn = page_to_pfn(pages);
-
- if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
- return false;
-
- VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
- nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-
- mutex_lock(&kvm_cma_mutex);
- bitmap_clear(cma->bitmap,
- (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
- nr_chunk);
- free_contig_range(pfn, nr_pages);
- mutex_unlock(&kvm_cma_mutex);
-
- return true;
-}
-
-static int __init kvm_cma_activate_area(unsigned long base_pfn,
- unsigned long count)
-{
- unsigned long pfn = base_pfn;
- unsigned i = count >> pageblock_order;
- struct zone *zone;
-
- WARN_ON_ONCE(!pfn_valid(pfn));
- zone = page_zone(pfn_to_page(pfn));
- do {
- unsigned j;
- base_pfn = pfn;
- for (j = pageblock_nr_pages; j; --j, pfn++) {
- WARN_ON_ONCE(!pfn_valid(pfn));
- /*
- * alloc_contig_range requires the pfn range
- * specified to be in the same zone. Make this
- * simple by forcing the entire CMA resv range
- * to be in the same zone.
- */
- if (page_zone(pfn_to_page(pfn)) != zone)
- return -EINVAL;
- }
- init_cma_reserved_pageblock(pfn_to_page(base_pfn));
- } while (--i);
- return 0;
-}
-
-static int __init kvm_cma_init_reserved_areas(void)
-{
- int bitmap_size, ret;
- unsigned long chunk_count;
- struct kvm_cma *cma = &kvm_cma_area;
-
- pr_debug("%s()\n", __func__);
- if (!cma->count)
- return 0;
- chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
- bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
- cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!cma->bitmap)
- return -ENOMEM;
-
- ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
- if (ret)
- goto error;
- return 0;
-
-error:
- kfree(cma->bitmap);
- return ret;
-}
-core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
deleted file mode 100644
index 655144f75fa5..000000000000
--- a/arch/powerpc/kvm/book3s_hv_cma.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-
-#ifndef __POWERPC_KVM_CMA_ALLOC_H__
-#define __POWERPC_KVM_CMA_ALLOC_H__
-/*
- * Both RMA and Hash page allocation will be multiple of 256K.
- */
-#define KVM_CMA_CHUNK_ORDER 18
-
-extern struct page *kvm_alloc_cma(unsigned long nr_pages,
- unsigned long align_pages);
-extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
-extern long kvm_cma_declare_contiguous(phys_addr_t size,
- phys_addr_t alignment) __init;
-#endif
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 7b6c10750179..d85e86aac7fb 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -33,6 +33,7 @@
#include <linux/export.h>
#include <asm/tlbflush.h>
+#include <asm/dma.h>
#include "mmu_decl.h"
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2c8e90f5789e..e0f7a189c48e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -128,7 +128,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
return -EINVAL;
/* this should work for most non-highmem platforms */
- zone = pgdata->node_zones;
+ zone = pgdata->node_zones +
+ zone_for_memory(nid, start, size, 0);
return __add_pages(nid, zone, start_pfn, nr_pages);
}
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 534574a97ec9..3a104284b338 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -25,6 +25,7 @@
#include <asm/time.h>
#include <asm/uic.h>
#include <asm/ppc4xx.h>
+#include <asm/dma.h>
static __initdata struct of_device_id warp_of_bus[] = {
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 6e19b0ad5d26..3feffde9128d 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -13,6 +13,7 @@
#include <generated/utsrelease.h>
#include <linux/pci.h>
#include <linux/of.h>
+#include <asm/dma.h>
#include <asm/prom.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 03aabc0e16ac..2fe12046279e 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -24,6 +24,7 @@
#include <asm/i8259.h>
#include <asm/time.h>
#include <asm/udbg.h>
+#include <asm/dma.h>
extern void __flush_disable_L1(void);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8ca60f8d5683..05c78bb5f570 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -145,6 +145,7 @@ config S390
select TTY
select VIRT_CPU_ACCOUNTING
select VIRT_TO_BUS
+ select ARCH_HAS_SG_CHAIN
config SCHED_OMIT_FRAME_POINTER
def_bool y
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 57892a8a9055..b3fea0722ff1 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -4,4 +4,5 @@ generic-y += clkdev.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
deleted file mode 100644
index 6d45ef6c12a7..000000000000
--- a/arch/s390/include/asm/scatterlist.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 2f947aba4bd4..aad209199f7e 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -8,5 +8,6 @@ generic-y += cputime.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += xor.h
diff --git a/arch/score/include/asm/scatterlist.h b/arch/score/include/asm/scatterlist.h
deleted file mode 100644
index 9f533b8362c7..000000000000
--- a/arch/score/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCORE_SCATTERLIST_H
-#define _ASM_SCORE_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_SCORE_SCATTERLIST_H */
diff --git a/arch/score/include/uapi/asm/ptrace.h b/arch/score/include/uapi/asm/ptrace.h
index f59771a3f127..5c5e794058be 100644
--- a/arch/score/include/uapi/asm/ptrace.h
+++ b/arch/score/include/uapi/asm/ptrace.h
@@ -4,17 +4,6 @@
#define PTRACE_GETREGS 12
#define PTRACE_SETREGS 13
-#define PC 32
-#define CONDITION 33
-#define ECR 34
-#define EMA 35
-#define CEH 36
-#define CEL 37
-#define COUNTER 38
-#define LDCR 39
-#define STCR 40
-#define PSR 41
-
#define SINGLESTEP16_INSN 0x7006
#define SINGLESTEP32_INSN 0x840C8000
#define BREAKPOINT16_INSN 0x7002 /* work on SPG300 */
diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig
index cfd5b90a8628..78bc97b1d027 100644
--- a/arch/sh/drivers/dma/Kconfig
+++ b/arch/sh/drivers/dma/Kconfig
@@ -12,9 +12,8 @@ config SH_DMA_IRQ_MULTI
default y if CPU_SUBTYPE_SH7750 || CPU_SUBTYPE_SH7751 || \
CPU_SUBTYPE_SH7750S || CPU_SUBTYPE_SH7750R || \
CPU_SUBTYPE_SH7751R || CPU_SUBTYPE_SH7091 || \
- CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7764 || \
- CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \
- CPU_SUBTYPE_SH7760
+ CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \
+ CPU_SUBTYPE_SH7785 || CPU_SUBTYPE_SH7760
config SH_DMA_API
depends on SH_DMA
diff --git a/arch/sh/include/cpu-sh4/cpu/dma-register.h b/arch/sh/include/cpu-sh4/cpu/dma-register.h
index 02788b6a03b7..9cd81e54056a 100644
--- a/arch/sh/include/cpu-sh4/cpu/dma-register.h
+++ b/arch/sh/include/cpu-sh4/cpu/dma-register.h
@@ -32,7 +32,6 @@
#define CHCR_TS_HIGH_SHIFT (20 - 2) /* 2 bits for shifted low TS */
#elif defined(CONFIG_CPU_SUBTYPE_SH7757) || \
defined(CONFIG_CPU_SUBTYPE_SH7763) || \
- defined(CONFIG_CPU_SUBTYPE_SH7764) || \
defined(CONFIG_CPU_SUBTYPE_SH7780) || \
defined(CONFIG_CPU_SUBTYPE_SH7785)
#define CHCR_TS_LOW_MASK 0x00000018
diff --git a/arch/sh/include/cpu-sh4a/cpu/dma.h b/arch/sh/include/cpu-sh4a/cpu/dma.h
index 89afb650ce25..8ceccceae844 100644
--- a/arch/sh/include/cpu-sh4a/cpu/dma.h
+++ b/arch/sh/include/cpu-sh4a/cpu/dma.h
@@ -14,8 +14,7 @@
#define DMTE4_IRQ evt2irq(0xb80)
#define DMAE0_IRQ evt2irq(0xbc0) /* DMA Error IRQ*/
#define SH_DMAC_BASE0 0xFE008020
-#elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \
- defined(CONFIG_CPU_SUBTYPE_SH7764)
+#elif defined(CONFIG_CPU_SUBTYPE_SH7763)
#define DMTE0_IRQ evt2irq(0x640)
#define DMTE4_IRQ evt2irq(0x780)
#define DMAE0_IRQ evt2irq(0x6c0)
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
index f579dd528198..c187b9579c21 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -307,7 +307,7 @@ static struct clk_lookup lookups[] = {
CLKDEV_ICK_ID("fck", "sh-tmu.0", &mstp_clks[HWBLK_TMU0]),
CLKDEV_ICK_ID("fck", "sh-tmu.1", &mstp_clks[HWBLK_TMU1]),
- CLKDEV_ICK_ID("fck", "sh-cmt-16.0", &mstp_clks[HWBLK_CMT]),
+ CLKDEV_ICK_ID("fck", "sh-cmt-32.0", &mstp_clks[HWBLK_CMT]),
CLKDEV_DEV_ID("sh-wdt.0", &mstp_clks[HWBLK_RWDT]),
CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[HWBLK_DMAC1]),
@@ -332,6 +332,8 @@ static struct clk_lookup lookups[] = {
CLKDEV_CON_ID("tsif0", &mstp_clks[HWBLK_TSIF]),
CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[HWBLK_USB1]),
CLKDEV_DEV_ID("renesas_usbhs.0", &mstp_clks[HWBLK_USB0]),
+ CLKDEV_CON_ID("usb1", &mstp_clks[HWBLK_USB1]),
+ CLKDEV_CON_ID("usb0", &mstp_clks[HWBLK_USB0]),
CLKDEV_CON_ID("2dg0", &mstp_clks[HWBLK_2DG]),
CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[HWBLK_SDHI0]),
CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[HWBLK_SDHI1]),
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 552c8fcf9416..d6d0a986c6e9 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -80,10 +80,8 @@ static int __init rtc_generic_init(void)
return -ENODEV;
pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
- if (IS_ERR(pdev))
- return PTR_ERR(pdev);
- return 0;
+ return PTR_ERR_OR_ZERO(pdev);
}
module_init(rtc_generic_init);
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index 74c03ecc4871..ecfc6b0c1da1 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -67,10 +67,8 @@ static int __init asids_debugfs_init(void)
NULL, &asids_debugfs_fops);
if (!asids_dentry)
return -ENOMEM;
- if (IS_ERR(asids_dentry))
- return PTR_ERR(asids_dentry);
- return 0;
+ return PTR_ERR_OR_ZERO(asids_dentry);
}
module_init(asids_debugfs_init);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 2d089fe2cba9..2790b6a64157 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -495,8 +495,9 @@ int arch_add_memory(int nid, u64 start, u64 size)
pgdat = NODE_DATA(nid);
/* We only have ZONE_NORMAL, so this is easy.. */
- ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL,
- start_pfn, nr_pages);
+ ret = __add_pages(nid, pgdat->node_zones +
+ zone_for_memory(nid, start, size, ZONE_NORMAL),
+ start_pfn, nr_pages);
if (unlikely(ret))
printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 4692c90936f1..a537816613f9 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,6 +42,7 @@ config SPARC
select MODULES_USE_ELF_RELA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
+ select ARCH_HAS_SG_CHAIN
config SPARC32
def_bool !64BIT
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a45821818003..cdd1b447bb6c 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += mutex.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += serial.h
generic-y += trace_clock.h
generic-y += types.h
diff --git a/arch/sparc/include/asm/scatterlist.h b/arch/sparc/include/asm/scatterlist.h
deleted file mode 100644
index 92bb638313f8..000000000000
--- a/arch/sparc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _SPARC_SCATTERLIST_H
-#define _SPARC_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* !(_SPARC_SCATTERLIST_H) */
diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h
index 2f572b6b7bc2..44d2765bde2b 100644
--- a/arch/tile/include/asm/hardwall.h
+++ b/arch/tile/include/asm/hardwall.h
@@ -23,7 +23,7 @@
struct proc_dir_entry;
#ifdef CONFIG_HARDWALL
void proc_tile_hardwall_init(struct proc_dir_entry *root);
-int proc_pid_hardwall(struct task_struct *task, char *buffer);
+int proc_pid_hardwall(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task);
#else
static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {}
#endif
diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c
index 531f4c365351..aca6000bca75 100644
--- a/arch/tile/kernel/hardwall.c
+++ b/arch/tile/kernel/hardwall.c
@@ -947,15 +947,15 @@ static void hardwall_remove_proc(struct hardwall_info *info)
remove_proc_entry(buf, info->type->proc_dir);
}
-int proc_pid_hardwall(struct task_struct *task, char *buffer)
+int proc_pid_hardwall(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
int i;
int n = 0;
for (i = 0; i < HARDWALL_TYPES; ++i) {
struct hardwall_info *info = task->thread.hardwall[i].info;
if (info)
- n += sprintf(&buffer[n], "%s: %d\n",
- info->type->name, info->id);
+ seq_printf(m, "%s: %d\n", info->type->name, info->id);
}
return n;
}
diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index 4918d91bc3a6..d19b13e3a59f 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -58,7 +58,7 @@ void *module_alloc(unsigned long size)
area->nr_pages = npages;
area->pages = pages;
- if (map_vm_area(area, prot_rwx, &pages)) {
+ if (map_vm_area(area, prot_rwx, pages)) {
vunmap(area->addr);
goto error;
}
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index a5e4b6068213..7bd64aa2e94a 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += param.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += sections.h
generic-y += switch_to.h
generic-y += topology.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cb101ffcd490..8f0d2957a053 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -96,6 +96,7 @@ config X86
select IRQ_FORCED_THREADING
select HAVE_BPF_JIT if X86_64
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select ARCH_HAS_SG_CHAIN
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 3ca9762e1649..3bf000fab0ae 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,6 +5,7 @@ genhdr-y += unistd_64.h
genhdr-y += unistd_x32.h
generic-y += clkdev.h
-generic-y += early_ioremap.h
generic-y += cputime.h
+generic-y += early_ioremap.h
generic-y += mcs_spinlock.h
+generic-y += scatterlist.h
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 4064acae625d..01b493e5a99b 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -9,7 +9,6 @@
#ifdef CONFIG_NUMA
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
/*
* Too small node sizes may confuse the VM badly. Usually they
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 5be9063545d2..809abb335627 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -115,7 +115,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
native_set_pgd(pgd, native_make_pgd(0));
}
-extern void sync_global_pgds(unsigned long start, unsigned long end);
+extern void sync_global_pgds(unsigned long start, unsigned long end,
+ int removed);
/*
* Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
deleted file mode 100644
index 4240878b9d76..000000000000
--- a/arch/x86/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_X86_SCATTERLIST_H
-#define _ASM_X86_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1dbade870f90..d393ac669cc0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -350,7 +350,7 @@ out:
void vmalloc_sync_all(void)
{
- sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
+ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0);
}
/*
@@ -1218,7 +1218,8 @@ good_area:
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
- * the fault:
+ * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if
+ * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
*/
fault = handle_mm_fault(mm, vma, address, flags);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index e39504878aec..7d05565ba781 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -825,7 +825,8 @@ void __init mem_init(void)
int arch_add_memory(int nid, u64 start, u64 size)
{
struct pglist_data *pgdata = NODE_DATA(nid);
- struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
+ struct zone *zone = pgdata->node_zones +
+ zone_for_memory(nid, start, size, ZONE_HIGHMEM);
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index df1a9927ad29..529625118ff6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -178,7 +178,7 @@ __setup("noexec32=", nonx32_setup);
* When memory was added/removed make sure all the processes MM have
* suitable PGD entries in the local PGD level page.
*/
-void sync_global_pgds(unsigned long start, unsigned long end)
+void sync_global_pgds(unsigned long start, unsigned long end, int removed)
{
unsigned long address;
@@ -186,7 +186,12 @@ void sync_global_pgds(unsigned long start, unsigned long end)
const pgd_t *pgd_ref = pgd_offset_k(address);
struct page *page;
- if (pgd_none(*pgd_ref))
+ /*
+ * When it is called after memory hot remove, pgd_none()
+ * returns true. In this case (removed == 1), we must clear
+ * the PGD entries in the local PGD level page.
+ */
+ if (pgd_none(*pgd_ref) && !removed)
continue;
spin_lock(&pgd_lock);
@@ -199,12 +204,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
+ if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
BUG_ON(pgd_page_vaddr(*pgd)
!= pgd_page_vaddr(*pgd_ref));
+ if (removed) {
+ if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
+ pgd_clear(pgd);
+ } else {
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ }
+
spin_unlock(pgt_lock);
}
spin_unlock(&pgd_lock);
@@ -633,7 +644,7 @@ kernel_physical_mapping_init(unsigned long start,
}
if (pgd_changed)
- sync_global_pgds(addr, end - 1);
+ sync_global_pgds(addr, end - 1, 0);
__flush_tlb_all();
@@ -691,7 +702,8 @@ static void update_end_of_memory_vars(u64 start, u64 size)
int arch_add_memory(int nid, u64 start, u64 size)
{
struct pglist_data *pgdat = NODE_DATA(nid);
- struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
+ struct zone *zone = pgdat->node_zones +
+ zone_for_memory(nid, start, size, ZONE_NORMAL);
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
@@ -975,25 +987,26 @@ static void __meminit
remove_pagetable(unsigned long start, unsigned long end, bool direct)
{
unsigned long next;
+ unsigned long addr;
pgd_t *pgd;
pud_t *pud;
bool pgd_changed = false;
- for (; start < end; start = next) {
- next = pgd_addr_end(start, end);
+ for (addr = start; addr < end; addr = next) {
+ next = pgd_addr_end(addr, end);
- pgd = pgd_offset_k(start);
+ pgd = pgd_offset_k(addr);
if (!pgd_present(*pgd))
continue;
pud = (pud_t *)pgd_page_vaddr(*pgd);
- remove_pud_table(pud, start, next, direct);
+ remove_pud_table(pud, addr, next, direct);
if (free_pud_table(pud, pgd))
pgd_changed = true;
}
if (pgd_changed)
- sync_global_pgds(start, end - 1);
+ sync_global_pgds(start, end - 1, 1);
flush_tlb_all();
}
@@ -1340,7 +1353,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
else
err = vmemmap_populate_basepages(start, end, node);
if (!err)
- sync_global_pgds(start, end - 1);
+ sync_global_pgds(start, end - 1, 0);
return err;
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index a32b706c401a..d221374d5ce8 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -185,8 +185,8 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
return numa_add_memblk_to(nid, start, end, &numa_meminfo);
}
-/* Initialize NODE_DATA for a node on the local memory */
-static void __init setup_node_data(int nid, u64 start, u64 end)
+/* Allocate NODE_DATA for a node on the local memory */
+static void __init alloc_node_data(int nid)
{
const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
u64 nd_pa;
@@ -194,18 +194,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
int tnid;
/*
- * Don't confuse VM with a node that doesn't have the
- * minimum amount of memory:
- */
- if (end && (end - start) < NODE_MIN_SIZE)
- return;
-
- start = roundup(start, ZONE_ALIGN);
-
- printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
- nid, start, end - 1);
-
- /*
* Allocate node data. Try node-local memory and then any node.
* Never allocate in DMA zone.
*/
@@ -222,7 +210,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
nd = __va(nd_pa);
/* report and initialize */
- printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n",
+ printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid,
nd_pa, nd_pa + nd_size - 1);
tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
if (tnid != nid)
@@ -230,9 +218,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
node_data[nid] = nd;
memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
- NODE_DATA(nid)->node_id = nid;
- NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
- NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT;
node_set_online(nid);
}
@@ -523,8 +508,17 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
end = max(mi->blk[i].end, end);
}
- if (start < end)
- setup_node_data(nid, start, end);
+ if (start >= end)
+ continue;
+
+ /*
+ * Don't confuse VM with a node that doesn't have the
+ * minimum amount of memory:
+ */
+ if (end && (end - start) < NODE_MIN_SIZE)
+ continue;
+
+ alloc_node_data(nid);
}
/* Dump memblock with node info and return. */
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index f15103dff4b4..d143d216d52b 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -40,20 +40,40 @@ void __init efi_bgrt_init(void)
if (ACPI_FAILURE(status))
return;
- if (bgrt_tab->header.length < sizeof(*bgrt_tab))
+ if (bgrt_tab->header.length < sizeof(*bgrt_tab)) {
+ pr_err("Ignoring BGRT: invalid length %u (expected %zu)\n",
+ bgrt_tab->header.length, sizeof(*bgrt_tab));
return;
- if (bgrt_tab->version != 1 || bgrt_tab->status != 1)
+ }
+ if (bgrt_tab->version != 1) {
+ pr_err("Ignoring BGRT: invalid version %u (expected 1)\n",
+ bgrt_tab->version);
+ return;
+ }
+ if (bgrt_tab->status != 1) {
+ pr_err("Ignoring BGRT: invalid status %u (expected 1)\n",
+ bgrt_tab->status);
+ return;
+ }
+ if (bgrt_tab->image_type != 0) {
+ pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n",
+ bgrt_tab->image_type);
return;
- if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
+ }
+ if (!bgrt_tab->image_address) {
+ pr_err("Ignoring BGRT: null image address\n");
return;
+ }
image = efi_lookup_mapped_addr(bgrt_tab->image_address);
if (!image) {
image = early_memremap(bgrt_tab->image_address,
sizeof(bmp_header));
ioremapped = true;
- if (!image)
+ if (!image) {
+ pr_err("Ignoring BGRT: failed to map image header memory\n");
return;
+ }
}
memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
@@ -61,14 +81,18 @@ void __init efi_bgrt_init(void)
early_iounmap(image, sizeof(bmp_header));
bgrt_image_size = bmp_header.size;
- bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
- if (!bgrt_image)
+ bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN);
+ if (!bgrt_image) {
+ pr_err("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n",
+ bgrt_image_size);
return;
+ }
if (ioremapped) {
image = early_memremap(bgrt_tab->image_address,
bmp_header.size);
if (!image) {
+ pr_err("Ignoring BGRT: failed to map image memory\n");
kfree(bgrt_image);
bgrt_image = NULL;
return;
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index bc423f7b02da..38c8ac2970ac 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -646,6 +646,4 @@ void __init bio_integrity_init(void)
sizeof(struct bio_integrity_payload) +
sizeof(struct bio_vec) * BIP_INLINE_VECS,
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- if (!bip_slab)
- panic("Failed to create slab\n");
}
diff --git a/block/genhd.c b/block/genhd.c
index 791f41943132..7bd4372e8b6f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -849,7 +849,7 @@ static int show_partition(struct seq_file *seqf, void *v)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+ if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
diff --git a/crypto/zlib.c b/crypto/zlib.c
index 06b62e5cdcc7..c9ee681d57fd 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -168,7 +168,7 @@ static int zlib_compress_update(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
@@ -198,7 +198,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
@@ -283,7 +283,7 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
@@ -331,7 +331,7 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index e65d400efd44..e1b92788c225 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -16,6 +16,7 @@ menuconfig ATA
depends on BLOCK
depends on !(M32R || M68K || S390) || BROKEN
select SCSI
+ select GLOB
---help---
If you want to use an ATA hard disk, ATA tape drive, ATA CD-ROM or
any other ATA device under Linux, say Y and make sure that you know
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 677c0c1b03bd..dbdc5d32343f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -59,6 +59,7 @@
#include <linux/async.h>
#include <linux/log2.h>
#include <linux/slab.h>
+#include <linux/glob.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_host.h>
@@ -4250,73 +4251,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
{ }
};
-/**
- * glob_match - match a text string against a glob-style pattern
- * @text: the string to be examined
- * @pattern: the glob-style pattern to be matched against
- *
- * Either/both of text and pattern can be empty strings.
- *
- * Match text against a glob-style pattern, with wildcards and simple sets:
- *
- * ? matches any single character.
- * * matches any run of characters.
- * [xyz] matches a single character from the set: x, y, or z.
- * [a-d] matches a single character from the range: a, b, c, or d.
- * [a-d0-9] matches a single character from either range.
- *
- * The special characters ?, [, -, or *, can be matched using a set, eg. [*]
- * Behaviour with malformed patterns is undefined, though generally reasonable.
- *
- * Sample patterns: "SD1?", "SD1[0-5]", "*R0", "SD*1?[012]*xx"
- *
- * This function uses one level of recursion per '*' in pattern.
- * Since it calls _nothing_ else, and has _no_ explicit local variables,
- * this will not cause stack problems for any reasonable use here.
- *
- * RETURNS:
- * 0 on match, 1 otherwise.
- */
-static int glob_match (const char *text, const char *pattern)
-{
- do {
- /* Match single character or a '?' wildcard */
- if (*text == *pattern || *pattern == '?') {
- if (!*pattern++)
- return 0; /* End of both strings: match */
- } else {
- /* Match single char against a '[' bracketed ']' pattern set */
- if (!*text || *pattern != '[')
- break; /* Not a pattern set */
- while (*++pattern && *pattern != ']' && *text != *pattern) {
- if (*pattern == '-' && *(pattern - 1) != '[')
- if (*text > *(pattern - 1) && *text < *(pattern + 1)) {
- ++pattern;
- break;
- }
- }
- if (!*pattern || *pattern == ']')
- return 1; /* No match */
- while (*pattern && *pattern++ != ']');
- }
- } while (*++text && *pattern);
-
- /* Match any run of chars against a '*' wildcard */
- if (*pattern == '*') {
- if (!*++pattern)
- return 0; /* Match: avoid recursion at end of pattern */
- /* Loop to handle additional pattern chars after the wildcard */
- while (*text) {
- if (glob_match(text, pattern) == 0)
- return 0; /* Remainder matched */
- ++text; /* Absorb (match) this char and try again */
- }
- }
- if (!*text && !*pattern)
- return 0; /* End of both strings: match */
- return 1; /* No match */
-}
-
static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
{
unsigned char model_num[ATA_ID_PROD_LEN + 1];
@@ -4327,10 +4261,10 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
ata_id_c_string(dev->id, model_rev, ATA_ID_FW_REV, sizeof(model_rev));
while (ad->model_num) {
- if (!glob_match(model_num, ad->model_num)) {
+ if (glob_match(model_num, ad->model_num)) {
if (ad->model_rev == NULL)
return ad->horkage;
- if (!glob_match(model_rev, ad->model_rev))
+ if (glob_match(model_rev, ad->model_rev))
return ad->horkage;
}
ad++;
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 88500fed3c7a..4e7f0ff83ae7 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -289,16 +289,6 @@ config CMA_ALIGNMENT
If unsure, leave the default value "8".
-config CMA_AREAS
- int "Maximum count of the CMA device-private areas"
- default 7
- help
- CMA allows to create CMA areas for particular devices. This parameter
- sets the maximum number of such device private CMA areas in the
- system.
-
- If unsure, leave the default value "7".
-
endif
endmenu
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 6467c919c509..6606abdf880c 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -24,23 +24,9 @@
#include <linux/memblock.h>
#include <linux/err.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/page-isolation.h>
#include <linux/sizes.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/mm_types.h>
#include <linux/dma-contiguous.h>
-
-struct cma {
- unsigned long base_pfn;
- unsigned long count;
- unsigned long *bitmap;
- struct mutex lock;
-};
-
-struct cma *dma_contiguous_default_area;
+#include <linux/cma.h>
#ifdef CONFIG_CMA_SIZE_MBYTES
#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
@@ -48,6 +34,8 @@ struct cma *dma_contiguous_default_area;
#define CMA_SIZE_MBYTES 0
#endif
+struct cma *dma_contiguous_default_area;
+
/*
* Default global CMA area size can be defined in kernel's .config.
* This is useful mainly for distro maintainers to create a kernel
@@ -154,65 +142,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
}
}
-static DEFINE_MUTEX(cma_mutex);
-
-static int __init cma_activate_area(struct cma *cma)
-{
- int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
- unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
- unsigned i = cma->count >> pageblock_order;
- struct zone *zone;
-
- cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-
- if (!cma->bitmap)
- return -ENOMEM;
-
- WARN_ON_ONCE(!pfn_valid(pfn));
- zone = page_zone(pfn_to_page(pfn));
-
- do {
- unsigned j;
- base_pfn = pfn;
- for (j = pageblock_nr_pages; j; --j, pfn++) {
- WARN_ON_ONCE(!pfn_valid(pfn));
- /*
- * alloc_contig_range requires the pfn range
- * specified to be in the same zone. Make this
- * simple by forcing the entire CMA resv range
- * to be in the same zone.
- */
- if (page_zone(pfn_to_page(pfn)) != zone)
- goto err;
- }
- init_cma_reserved_pageblock(pfn_to_page(base_pfn));
- } while (--i);
-
- mutex_init(&cma->lock);
- return 0;
-
-err:
- kfree(cma->bitmap);
- return -EINVAL;
-}
-
-static struct cma cma_areas[MAX_CMA_AREAS];
-static unsigned cma_area_count;
-
-static int __init cma_init_reserved_areas(void)
-{
- int i;
-
- for (i = 0; i < cma_area_count; i++) {
- int ret = cma_activate_area(&cma_areas[i]);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-core_initcall(cma_init_reserved_areas);
-
/**
* dma_contiguous_reserve_area() - reserve custom contiguous area
* @size: Size of the reserved area (in bytes),
@@ -234,72 +163,17 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
phys_addr_t limit, struct cma **res_cma,
bool fixed)
{
- struct cma *cma = &cma_areas[cma_area_count];
- phys_addr_t alignment;
- int ret = 0;
-
- pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
- (unsigned long)size, (unsigned long)base,
- (unsigned long)limit);
-
- /* Sanity checks */
- if (cma_area_count == ARRAY_SIZE(cma_areas)) {
- pr_err("Not enough slots for CMA reserved regions!\n");
- return -ENOSPC;
- }
-
- if (!size)
- return -EINVAL;
-
- /* Sanitise input arguments */
- alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
- base = ALIGN(base, alignment);
- size = ALIGN(size, alignment);
- limit &= ~(alignment - 1);
-
- /* Reserve memory */
- if (base && fixed) {
- if (memblock_is_region_reserved(base, size) ||
- memblock_reserve(base, size) < 0) {
- ret = -EBUSY;
- goto err;
- }
- } else {
- phys_addr_t addr = memblock_alloc_range(size, alignment, base,
- limit);
- if (!addr) {
- ret = -ENOMEM;
- goto err;
- } else {
- base = addr;
- }
- }
-
- /*
- * Each reserved area must be initialised later, when more kernel
- * subsystems (like slab allocator) are available.
- */
- cma->base_pfn = PFN_DOWN(base);
- cma->count = size >> PAGE_SHIFT;
- *res_cma = cma;
- cma_area_count++;
+ int ret;
- pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
- (unsigned long)base);
+ ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma);
+ if (ret)
+ return ret;
/* Architecture specific contiguous memory fixup. */
- dma_contiguous_early_fixup(base, size);
- return 0;
-err:
- pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
- return ret;
-}
+ dma_contiguous_early_fixup(cma_get_base(*res_cma),
+ cma_get_size(*res_cma));
-static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
-{
- mutex_lock(&cma->lock);
- bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
- mutex_unlock(&cma->lock);
+ return 0;
}
/**
@@ -316,62 +190,10 @@ static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
struct page *dma_alloc_from_contiguous(struct device *dev, int count,
unsigned int align)
{
- unsigned long mask, pfn, pageno, start = 0;
- struct cma *cma = dev_get_cma_area(dev);
- struct page *page = NULL;
- int ret;
-
- if (!cma || !cma->count)
- return NULL;
-
if (align > CONFIG_CMA_ALIGNMENT)
align = CONFIG_CMA_ALIGNMENT;
- pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
- count, align);
-
- if (!count)
- return NULL;
-
- mask = (1 << align) - 1;
-
-
- for (;;) {
- mutex_lock(&cma->lock);
- pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
- start, count, mask);
- if (pageno >= cma->count) {
- mutex_unlock(&cma->lock);
- break;
- }
- bitmap_set(cma->bitmap, pageno, count);
- /*
- * It's safe to drop the lock here. We've marked this region for
- * our exclusive use. If the migration fails we will take the
- * lock again and unmark it.
- */
- mutex_unlock(&cma->lock);
-
- pfn = cma->base_pfn + pageno;
- mutex_lock(&cma_mutex);
- ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
- mutex_unlock(&cma_mutex);
- if (ret == 0) {
- page = pfn_to_page(pfn);
- break;
- } else if (ret != -EBUSY) {
- clear_cma_bitmap(cma, pfn, count);
- break;
- }
- clear_cma_bitmap(cma, pfn, count);
- pr_debug("%s(): memory range at %p is busy, retrying\n",
- __func__, pfn_to_page(pfn));
- /* try again with a bit different memory target */
- start = pageno + mask + 1;
- }
-
- pr_debug("%s(): returned %p\n", __func__, page);
- return page;
+ return cma_alloc(dev_get_cma_area(dev), count, align);
}
/**
@@ -387,23 +209,5 @@ struct page *dma_alloc_from_contiguous(struct device *dev, int count,
bool dma_release_from_contiguous(struct device *dev, struct page *pages,
int count)
{
- struct cma *cma = dev_get_cma_area(dev);
- unsigned long pfn;
-
- if (!cma || !pages)
- return false;
-
- pr_debug("%s(page %p)\n", __func__, (void *)pages);
-
- pfn = page_to_pfn(pages);
-
- if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
- return false;
-
- VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
-
- free_contig_range(pfn, count);
- clear_cma_bitmap(cma, pfn, count);
-
- return true;
+ return cma_release(dev_get_cma_area(dev), pages, count);
}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 89f752dd8465..a2e13e250bba 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -284,7 +284,7 @@ static int memory_subsys_online(struct device *dev)
* attribute and need to set the online_type.
*/
if (mem->online_type < 0)
- mem->online_type = ONLINE_KEEP;
+ mem->online_type = MMOP_ONLINE_KEEP;
ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
@@ -315,23 +315,23 @@ store_mem_state(struct device *dev,
if (ret)
return ret;
- if (!strncmp(buf, "online_kernel", min_t(int, count, 13)))
- online_type = ONLINE_KERNEL;
- else if (!strncmp(buf, "online_movable", min_t(int, count, 14)))
- online_type = ONLINE_MOVABLE;
- else if (!strncmp(buf, "online", min_t(int, count, 6)))
- online_type = ONLINE_KEEP;
- else if (!strncmp(buf, "offline", min_t(int, count, 7)))
- online_type = -1;
+ if (sysfs_streq(buf, "online_kernel"))
+ online_type = MMOP_ONLINE_KERNEL;
+ else if (sysfs_streq(buf, "online_movable"))
+ online_type = MMOP_ONLINE_MOVABLE;
+ else if (sysfs_streq(buf, "online"))
+ online_type = MMOP_ONLINE_KEEP;
+ else if (sysfs_streq(buf, "offline"))
+ online_type = MMOP_OFFLINE;
else {
ret = -EINVAL;
goto err;
}
switch (online_type) {
- case ONLINE_KERNEL:
- case ONLINE_MOVABLE:
- case ONLINE_KEEP:
+ case MMOP_ONLINE_KERNEL:
+ case MMOP_ONLINE_MOVABLE:
+ case MMOP_ONLINE_KEEP:
/*
* mem->online_type is not protected so there can be a
* race here. However, when racing online, the first
@@ -342,7 +342,7 @@ store_mem_state(struct device *dev,
mem->online_type = online_type;
ret = device_online(&mem->dev);
break;
- case -1:
+ case MMOP_OFFLINE:
ret = device_offline(&mem->dev);
break;
default:
@@ -406,7 +406,9 @@ memory_probe_store(struct device *dev, struct device_attribute *attr,
int i, ret;
unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
- phys_addr = simple_strtoull(buf, NULL, 0);
+ ret = kstrtoull(buf, 0, &phys_addr);
+ if (ret)
+ return ret;
if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
return -EINVAL;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 8f7ed9933a7c..c6d3ae05f1ca 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -126,7 +126,7 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(nid, NR_FILE_PAGES)),
nid, K(node_page_state(nid, NR_FILE_MAPPED)),
nid, K(node_page_state(nid, NR_ANON_PAGES)),
- nid, K(node_page_state(nid, NR_SHMEM)),
+ nid, K(i.sharedram),
nid, node_page_state(nid, NR_KERNEL_STACK) *
THREAD_SIZE / 1024,
nid, K(node_page_state(nid, NR_PAGETABLE)),
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 36e54be402df..dfa4024c448a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -183,19 +183,32 @@ static ssize_t comp_algorithm_store(struct device *dev,
static int zram_test_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- return meta->table[index].flags & BIT(flag);
+ return meta->table[index].value & BIT(flag);
}
static void zram_set_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- meta->table[index].flags |= BIT(flag);
+ meta->table[index].value |= BIT(flag);
}
static void zram_clear_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- meta->table[index].flags &= ~BIT(flag);
+ meta->table[index].value &= ~BIT(flag);
+}
+
+static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+{
+ return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+}
+
+static void zram_set_obj_size(struct zram_meta *meta,
+ u32 index, size_t size)
+{
+ unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+
+ meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
}
static inline int is_partial_io(struct bio_vec *bvec)
@@ -255,7 +268,6 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
goto free_table;
}
- rwlock_init(&meta->tb_lock);
return meta;
free_table:
@@ -304,7 +316,12 @@ static void handle_zero_page(struct bio_vec *bvec)
flush_dcache_page(page);
}
-/* NOTE: caller should hold meta->tb_lock with write-side */
+
+/*
+ * To protect concurrent access to the same index entry,
+ * caller should hold this table index entry's bit_spinlock to
+ * indicate this index entry is accessing.
+ */
static void zram_free_page(struct zram *zram, size_t index)
{
struct zram_meta *meta = zram->meta;
@@ -324,11 +341,12 @@ static void zram_free_page(struct zram *zram, size_t index)
zs_free(meta->mem_pool, handle);
- atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size);
+ atomic64_sub(zram_get_obj_size(meta, index),
+ &zram->stats.compr_data_size);
atomic64_dec(&zram->stats.pages_stored);
meta->table[index].handle = 0;
- meta->table[index].size = 0;
+ zram_set_obj_size(meta, index, 0);
}
static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
@@ -337,14 +355,14 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
unsigned char *cmem;
struct zram_meta *meta = zram->meta;
unsigned long handle;
- u16 size;
+ size_t size;
- read_lock(&meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
handle = meta->table[index].handle;
- size = meta->table[index].size;
+ size = zram_get_obj_size(meta, index);
if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
- read_unlock(&meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
clear_page(mem);
return 0;
}
@@ -355,7 +373,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
else
ret = zcomp_decompress(zram->comp, cmem, size, mem);
zs_unmap_object(meta->mem_pool, handle);
- read_unlock(&meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
/* Should NEVER happen. Return bio error if it does. */
if (unlikely(ret)) {
@@ -376,14 +394,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
struct zram_meta *meta = zram->meta;
page = bvec->bv_page;
- read_lock(&meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
if (unlikely(!meta->table[index].handle) ||
zram_test_flag(meta, index, ZRAM_ZERO)) {
- read_unlock(&meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
handle_zero_page(bvec);
return 0;
}
- read_unlock(&meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
if (is_partial_io(bvec))
/* Use a temporary buffer to decompress the page */
@@ -461,10 +479,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
if (page_zero_filled(uncmem)) {
kunmap_atomic(user_mem);
/* Free memory associated with this sector now. */
- write_lock(&zram->meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
zram_free_page(zram, index);
zram_set_flag(meta, index, ZRAM_ZERO);
- write_unlock(&zram->meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
atomic64_inc(&zram->stats.zero_pages);
ret = 0;
@@ -514,12 +532,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
* Free memory associated with this sector
* before overwriting unused sectors.
*/
- write_lock(&zram->meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
zram_free_page(zram, index);
meta->table[index].handle = handle;
- meta->table[index].size = clen;
- write_unlock(&zram->meta->tb_lock);
+ zram_set_obj_size(meta, index, clen);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
/* Update stats */
atomic64_add(clen, &zram->stats.compr_data_size);
@@ -560,6 +578,7 @@ static void zram_bio_discard(struct zram *zram, u32 index,
int offset, struct bio *bio)
{
size_t n = bio->bi_iter.bi_size;
+ struct zram_meta *meta = zram->meta;
/*
* zram manages data in physical block size units. Because logical block
@@ -580,13 +599,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
while (n >= PAGE_SIZE) {
- /*
- * Discard request can be large so the lock hold times could be
- * lengthy. So take the lock once per page.
- */
- write_lock(&zram->meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
zram_free_page(zram, index);
- write_unlock(&zram->meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
index++;
n -= PAGE_SIZE;
}
@@ -821,9 +836,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
zram = bdev->bd_disk->private_data;
meta = zram->meta;
- write_lock(&meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
zram_free_page(zram, index);
- write_unlock(&meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
atomic64_inc(&zram->stats.notify_free);
}
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 7f21c145e317..5b0afde729cd 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -43,7 +43,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
/*-- End of configurable params */
#define SECTOR_SHIFT 9
-#define SECTOR_SIZE (1 << SECTOR_SHIFT)
#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
#define ZRAM_LOGICAL_BLOCK_SHIFT 12
@@ -51,10 +50,24 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \
(1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
-/* Flags for zram pages (table[page_no].flags) */
+
+/*
+ * The lower ZRAM_FLAG_SHIFT bits of table.value is for
+ * object size (excluding header), the higher bits is for
+ * zram_pageflags.
+ *
+ * zram is mainly used for memory efficiency so we want to keep memory
+ * footprint small so we can squeeze size and flags into a field.
+ * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header),
+ * the higher bits is for zram_pageflags.
+ */
+#define ZRAM_FLAG_SHIFT 24
+
+/* Flags for zram pages (table[page_no].value) */
enum zram_pageflags {
/* Page consists entirely of zeros */
- ZRAM_ZERO,
+ ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1,
+ ZRAM_ACCESS, /* page in now accessed */
__NR_ZRAM_PAGEFLAGS,
};
@@ -62,11 +75,10 @@ enum zram_pageflags {
/*-- Data structures */
/* Allocated for each disk page */
-struct table {
+struct zram_table_entry {
unsigned long handle;
- u16 size; /* object size (excluding header) */
- u8 flags;
-} __aligned(4);
+ unsigned long value;
+};
struct zram_stats {
atomic64_t compr_data_size; /* compressed size of pages stored */
@@ -81,8 +93,7 @@ struct zram_stats {
};
struct zram_meta {
- rwlock_t tb_lock; /* protect table */
- struct table *table;
+ struct zram_table_entry *table;
struct zs_pool *mem_pool;
};
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 17cf96c45f2b..79f18e6d9c4f 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -286,7 +286,11 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type)
{
struct firmware_map_entry *entry;
- entry = firmware_map_find_entry_bootmem(start, end, type);
+ entry = firmware_map_find_entry(start, end - 1, type);
+ if (entry)
+ return 0;
+
+ entry = firmware_map_find_entry_bootmem(start, end - 1, type);
if (!entry) {
entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
if (!entry)
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 7e4bae760e27..c3b80fd65d62 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -125,7 +125,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
parent = &entry->head;
}
if (parent) {
- hlist_add_after_rcu(parent, &item->head);
+ hlist_add_behind_rcu(&item->head, parent);
} else {
hlist_add_head_rcu(&item->head, h_list);
}
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index ae208f612198..cccef87963e0 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -688,7 +688,7 @@ static int atk_debugfs_gitm_get(void *p, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm,
atk_debugfs_gitm_get,
NULL,
- "0x%08llx\n")
+ "0x%08llx\n");
static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj)
{
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index a11ff74a5127..9eac8de9e8b7 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -178,6 +178,15 @@ comment "Input Device Drivers"
source "drivers/input/keyboard/Kconfig"
+config INPUT_LEDS
+ bool "LED Support"
+ depends on LEDS_CLASS = INPUT || LEDS_CLASS = y
+ select LEDS_TRIGGERS
+ default y
+ help
+ This option enables support for LEDs on keyboards managed
+ by the input layer.
+
source "drivers/input/mouse/Kconfig"
source "drivers/input/joystick/Kconfig"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 5ca3f631497f..2ab5f3336da5 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -6,6 +6,9 @@
obj-$(CONFIG_INPUT) += input-core.o
input-core-y := input.o input-compat.o input-mt.o ff-core.o
+ifeq ($(CONFIG_INPUT_LEDS),y)
+input-core-y += leds.o
+endif
obj-$(CONFIG_INPUT_FF_MEMLESS) += ff-memless.o
obj-$(CONFIG_INPUT_POLLDEV) += input-polldev.o
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 29ca0bb4f561..236bc56e6a81 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -710,6 +710,9 @@ static void input_disconnect_device(struct input_dev *dev)
handle->open = 0;
spin_unlock_irq(&dev->event_lock);
+
+ if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+ input_led_disconnect(dev);
}
/**
@@ -2136,6 +2139,9 @@ int input_register_device(struct input_dev *dev)
list_add_tail(&dev->node, &input_dev_list);
+ if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+ input_led_connect(dev);
+
list_for_each_entry(handler, &input_handler_list, node)
input_attach_handler(dev, handler);
diff --git a/drivers/input/leds.c b/drivers/input/leds.c
new file mode 100644
index 000000000000..985fa7ebeec7
--- /dev/null
+++ b/drivers/input/leds.c
@@ -0,0 +1,249 @@
+/*
+ * LED support for the input layer
+ *
+ * Copyright 2010-2014 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/input.h>
+
+/*
+ * Keyboard LEDs are propagated by default like the following example:
+ *
+ * VT keyboard numlock trigger
+ * -> vt::numl VT LED
+ * -> vt-numl VT trigger
+ * -> per-device inputX::numl LED
+ *
+ * Userland can however choose the trigger for the vt::numl LED, or
+ * independently choose the trigger for any inputx::numl LED.
+ *
+ *
+ * VT LED classes and triggers are registered on-demand according to
+ * existing LED devices
+ */
+
+/* Handler for VT LEDs, just triggers the corresponding VT trigger. */
+static void vt_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness);
+static struct led_classdev vt_leds[LED_CNT] = {
+#define DEFINE_INPUT_LED(vt_led, nam, deftrig) \
+ [vt_led] = { \
+ .name = "vt::"nam, \
+ .max_brightness = 1, \
+ .brightness_set = vt_led_set, \
+ .default_trigger = deftrig, \
+ }
+/* Default triggers for the VT LEDs just correspond to the legacy
+ * usage. */
+ DEFINE_INPUT_LED(LED_NUML, "numl", "kbd-numlock"),
+ DEFINE_INPUT_LED(LED_CAPSL, "capsl", "kbd-capslock"),
+ DEFINE_INPUT_LED(LED_SCROLLL, "scrolll", "kbd-scrollock"),
+ DEFINE_INPUT_LED(LED_COMPOSE, "compose", NULL),
+ DEFINE_INPUT_LED(LED_KANA, "kana", "kbd-kanalock"),
+ DEFINE_INPUT_LED(LED_SLEEP, "sleep", NULL),
+ DEFINE_INPUT_LED(LED_SUSPEND, "suspend", NULL),
+ DEFINE_INPUT_LED(LED_MUTE, "mute", NULL),
+ DEFINE_INPUT_LED(LED_MISC, "misc", NULL),
+ DEFINE_INPUT_LED(LED_MAIL, "mail", NULL),
+ DEFINE_INPUT_LED(LED_CHARGING, "charging", NULL),
+};
+static const char *const vt_led_names[LED_CNT] = {
+ [LED_NUML] = "numl",
+ [LED_CAPSL] = "capsl",
+ [LED_SCROLLL] = "scrolll",
+ [LED_COMPOSE] = "compose",
+ [LED_KANA] = "kana",
+ [LED_SLEEP] = "sleep",
+ [LED_SUSPEND] = "suspend",
+ [LED_MUTE] = "mute",
+ [LED_MISC] = "misc",
+ [LED_MAIL] = "mail",
+ [LED_CHARGING] = "charging",
+};
+/* Handler for hotplug initialization */
+static void vt_led_trigger_activate(struct led_classdev *cdev);
+/* VT triggers */
+static struct led_trigger vt_led_triggers[LED_CNT] = {
+#define DEFINE_INPUT_LED_TRIGGER(vt_led, nam) \
+ [vt_led] = { \
+ .name = "vt-"nam, \
+ .activate = vt_led_trigger_activate, \
+ }
+ DEFINE_INPUT_LED_TRIGGER(LED_NUML, "numl"),
+ DEFINE_INPUT_LED_TRIGGER(LED_CAPSL, "capsl"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SCROLLL, "scrolll"),
+ DEFINE_INPUT_LED_TRIGGER(LED_COMPOSE, "compose"),
+ DEFINE_INPUT_LED_TRIGGER(LED_KANA, "kana"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SLEEP, "sleep"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SUSPEND, "suspend"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MUTE, "mute"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MISC, "misc"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MAIL, "mail"),
+ DEFINE_INPUT_LED_TRIGGER(LED_CHARGING, "charging"),
+};
+
+/* Lock for registration coherency */
+static DEFINE_MUTEX(vt_led_registered_lock);
+
+/* Which VT LED classes and triggers are registered */
+static unsigned long vt_led_registered[BITS_TO_LONGS(LED_CNT)];
+
+/* Number of input devices having each LED */
+static int vt_led_references[LED_CNT];
+
+/* VT LED state change, tell the VT trigger. */
+static void vt_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ int led = cdev - vt_leds;
+
+ led_trigger_event(&vt_led_triggers[led], !!brightness);
+}
+
+/* LED state change for some keyboard, notify that keyboard. */
+static void perdevice_input_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ struct input_dev *dev;
+ struct led_classdev *leds;
+ int led;
+
+ dev = cdev->dev->platform_data;
+ if (!dev)
+ /* Still initializing */
+ return;
+ leds = dev->leds;
+ led = cdev - leds;
+
+ input_event(dev, EV_LED, led, !!brightness);
+ input_event(dev, EV_SYN, SYN_REPORT, 0);
+}
+
+/* Keyboard hotplug, initialize its LED status */
+static void vt_led_trigger_activate(struct led_classdev *cdev)
+{
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - vt_led_triggers;
+
+ if (cdev->brightness_set)
+ cdev->brightness_set(cdev, vt_leds[led].brightness);
+}
+
+/* Free led stuff from input device, used at abortion and disconnection. */
+static void input_led_delete(struct input_dev *dev)
+{
+ if (dev) {
+ struct led_classdev *leds = dev->leds;
+ if (leds) {
+ int i;
+ for (i = 0; i < LED_CNT; i++)
+ kfree(leds[i].name);
+ kfree(leds);
+ dev->leds = NULL;
+ }
+ }
+}
+
+/* A new input device with potential LEDs to connect. */
+int input_led_connect(struct input_dev *dev)
+{
+ int i, error = 0;
+ struct led_classdev *leds;
+
+ dev->leds = leds = kcalloc(LED_CNT, sizeof(*leds), GFP_KERNEL);
+ if (!dev->leds)
+ return -ENOMEM;
+
+ /* lazily register missing VT LEDs */
+ mutex_lock(&vt_led_registered_lock);
+ for (i = 0; i < LED_CNT; i++)
+ if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+ if (!vt_led_references[i]) {
+ led_trigger_register(&vt_led_triggers[i]);
+ /* This keyboard is first to have led i,
+ * try to register it */
+ if (!led_classdev_register(NULL, &vt_leds[i]))
+ vt_led_references[i] = 1;
+ else
+ led_trigger_unregister(&vt_led_triggers[i]);
+ } else
+ vt_led_references[i]++;
+ }
+ mutex_unlock(&vt_led_registered_lock);
+
+ /* and register this device's LEDs */
+ for (i = 0; i < LED_CNT; i++)
+ if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+ leds[i].name = kasprintf(GFP_KERNEL, "%s::%s",
+ dev_name(&dev->dev),
+ vt_led_names[i]);
+ if (!leds[i].name) {
+ error = -ENOMEM;
+ goto err;
+ }
+ leds[i].max_brightness = 1;
+ leds[i].brightness_set = perdevice_input_led_set;
+ leds[i].default_trigger = vt_led_triggers[i].name;
+ }
+
+ /* No issue so far, we can register for real. */
+ for (i = 0; i < LED_CNT; i++)
+ if (leds[i].name) {
+ led_classdev_register(&dev->dev, &leds[i]);
+ leds[i].dev->platform_data = dev;
+ perdevice_input_led_set(&leds[i],
+ vt_leds[i].brightness);
+ }
+
+ return 0;
+
+err:
+ input_led_delete(dev);
+ return error;
+}
+
+/*
+ * Disconnected input device. Clean it, and deregister now-useless VT LEDs
+ * and triggers.
+ */
+void input_led_disconnect(struct input_dev *dev)
+{
+ int i;
+ struct led_classdev *leds = dev->leds;
+
+ for (i = 0; i < LED_CNT; i++)
+ if (leds[i].name)
+ led_classdev_unregister(&leds[i]);
+
+ input_led_delete(dev);
+
+ mutex_lock(&vt_led_registered_lock);
+ for (i = 0; i < LED_CNT; i++) {
+ if (!vt_leds[i].name || !test_bit(i, dev->ledbit))
+ continue;
+
+ vt_led_references[i]--;
+ if (vt_led_references[i]) {
+ /* Still some devices needing it */
+ continue;
+ }
+
+ led_classdev_unregister(&vt_leds[i]);
+ led_trigger_unregister(&vt_led_triggers[i]);
+ clear_bit(i, vt_led_registered);
+ }
+ mutex_unlock(&vt_led_registered_lock);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("User LED support for input layer");
+MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>");
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 8c96e2ddf43b..f6e32ba50674 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -11,9 +11,6 @@ menuconfig NEW_LEDS
Say Y to enable Linux LED support. This allows control of supported
LEDs from both userspace and optionally, by kernel events (triggers).
- This is not related to standard keyboard LEDs which are controlled
- via the input system.
-
if NEW_LEDS
config LEDS_CLASS
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 0bf1e4edf04d..6590558d1d31 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -42,7 +42,6 @@ DEFINE_MUTEX(lguest_lock);
static __init int map_switcher(void)
{
int i, err;
- struct page **pagep;
/*
* Map the Switcher in to high memory.
@@ -110,11 +109,9 @@ static __init int map_switcher(void)
* This code actually sets up the pages we've allocated to appear at
* switcher_addr. map_vm_area() takes the vma we allocated above, the
* kind of pages we're mapping (kernel pages), and a pointer to our
- * array of struct pages. It increments that pointer, but we don't
- * care.
+ * array of struct pages.
*/
- pagep = lg_switcher_pages;
- err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
+ err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages);
if (err) {
printk("lguest: map_vm_area failed: %i\n", err);
goto free_vma;
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 1972d57aadb3..e7fbc08a0627 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -342,7 +342,7 @@ void st_int_recv(void *disc_data,
/* Unknow packet? */
default:
type = *ptr;
- if (st_gdata->list[type] == NULL) {
+ if (type >= ST_MAX_CHANNELS || st_gdata->list[type] == NULL) {
pr_err("chip/interface misbehavior dropping"
" frame starting with 0x%02x", type);
goto done;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 681a9e81ff51..e8ba7470700a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1948,7 +1948,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
/* add filter to the list */
if (parent)
- hlist_add_after(&parent->fdir_node, &input->fdir_node);
+ hlist_add_behind(&input->fdir_node, &parent->fdir_node);
else
hlist_add_head(&input->fdir_node,
&pf->fdir_filter_list);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 94a1c07efeb0..e4100b5737b6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2517,7 +2517,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
/* add filter to the list */
if (parent)
- hlist_add_after(&parent->fdir_node, &input->fdir_node);
+ hlist_add_behind(&input->fdir_node, &parent->fdir_node);
else
hlist_add_head(&input->fdir_node,
&adapter->fdir_filter_list);
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 768dfe9a9315..6d3e2093bf7f 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = {
.resume = toshoboe_wakeup
};
-static int __init
-donauboe_init (void)
-{
- return pci_register_driver(&donauboe_pci_driver);
-}
-
-static void __exit
-donauboe_cleanup (void)
-{
- pci_unregister_driver(&donauboe_pci_driver);
-}
-
-module_init(donauboe_init);
-module_exit(donauboe_cleanup);
+module_pci_driver(donauboe_pci_driver);
diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c
index c864f82bd37d..30e981be14c2 100644
--- a/drivers/parport/parport_ip32.c
+++ b/drivers/parport/parport_ip32.c
@@ -2204,7 +2204,7 @@ static int __init parport_ip32_init(void)
{
pr_info(PPIP32 "SGI IP32 built-in parallel port driver v0.6\n");
this_port = parport_ip32_probe_port();
- return IS_ERR(this_port) ? PTR_ERR(this_port) : 0;
+ return PTR_ERR_OR_ZERO(this_port);
}
/**
diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h
index 0305675270ee..a7b42680a06a 100644
--- a/drivers/rapidio/devices/tsi721.h
+++ b/drivers/rapidio/devices/tsi721.h
@@ -644,27 +644,26 @@ enum tsi721_smsg_int_flag {
#ifdef CONFIG_RAPIDIO_DMA_ENGINE
-#define TSI721_BDMA_BD_RING_SZ 128
#define TSI721_BDMA_MAX_BCOUNT (TSI721_DMAD_BCOUNT1 + 1)
struct tsi721_tx_desc {
struct dma_async_tx_descriptor txd;
- struct tsi721_dma_desc *hw_desc;
u16 destid;
/* low 64-bits of 66-bit RIO address */
u64 rio_addr;
/* upper 2-bits of 66-bit RIO address */
u8 rio_addr_u;
- u32 bcount;
- bool interrupt;
+ enum dma_rtype rtype;
struct list_head desc_node;
- struct list_head tx_list;
+ struct scatterlist *sg;
+ unsigned int sg_len;
+ enum dma_status status;
};
struct tsi721_bdma_chan {
int id;
void __iomem *regs;
- int bd_num; /* number of buffer descriptors */
+ int bd_num; /* number of HW buffer descriptors */
void *bd_base; /* start of DMA descriptors */
dma_addr_t bd_phys;
void *sts_base; /* start of DMA BD status FIFO */
@@ -680,7 +679,6 @@ struct tsi721_bdma_chan {
struct list_head active_list;
struct list_head queue;
struct list_head free_list;
- dma_cookie_t completed_cookie;
struct tasklet_struct tasklet;
bool active;
};
diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c
index 44341dc5b148..f64c5decb747 100644
--- a/drivers/rapidio/devices/tsi721_dma.c
+++ b/drivers/rapidio/devices/tsi721_dma.c
@@ -1,7 +1,7 @@
/*
* DMA Engine support for Tsi721 PCIExpress-to-SRIO bridge
*
- * Copyright 2011 Integrated Device Technology, Inc.
+ * Copyright (c) 2011-2014 Integrated Device Technology, Inc.
* Alexandre Bounine <alexandre.bounine@idt.com>
*
* This program is free software; you can redistribute it and/or modify it
@@ -14,9 +14,8 @@
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
*/
#include <linux/io.h>
@@ -32,9 +31,22 @@
#include <linux/interrupt.h>
#include <linux/kfifo.h>
#include <linux/delay.h>
+#include "../../dma/dmaengine.h"
#include "tsi721.h"
+#define TSI721_DMA_TX_QUEUE_SZ 16 /* number of transaction descriptors */
+
+#ifdef CONFIG_PCI_MSI
+static irqreturn_t tsi721_bdma_msix(int irq, void *ptr);
+#endif
+static int tsi721_submit_sg(struct tsi721_tx_desc *desc);
+
+static unsigned int dma_desc_per_channel = 128;
+module_param(dma_desc_per_channel, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(dma_desc_per_channel,
+ "Number of DMA descriptors per channel (default: 128)");
+
static inline struct tsi721_bdma_chan *to_tsi721_chan(struct dma_chan *chan)
{
return container_of(chan, struct tsi721_bdma_chan, dchan);
@@ -59,7 +71,7 @@ struct tsi721_tx_desc *tsi721_dma_first_active(
struct tsi721_tx_desc, desc_node);
}
-static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
+static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
{
struct tsi721_dma_desc *bd_ptr;
struct device *dev = bdma_chan->dchan.device->dev;
@@ -67,17 +79,23 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
dma_addr_t bd_phys;
dma_addr_t sts_phys;
int sts_size;
- int bd_num = bdma_chan->bd_num;
+#ifdef CONFIG_PCI_MSI
+ struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device);
+#endif
dev_dbg(dev, "Init Block DMA Engine, CH%d\n", bdma_chan->id);
- /* Allocate space for DMA descriptors */
+ /*
+ * Allocate space for DMA descriptors
+ * (add an extra element for link descriptor)
+ */
bd_ptr = dma_zalloc_coherent(dev,
- bd_num * sizeof(struct tsi721_dma_desc),
+ (bd_num + 1) * sizeof(struct tsi721_dma_desc),
&bd_phys, GFP_KERNEL);
if (!bd_ptr)
return -ENOMEM;
+ bdma_chan->bd_num = bd_num;
bdma_chan->bd_phys = bd_phys;
bdma_chan->bd_base = bd_ptr;
@@ -85,8 +103,8 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
bd_ptr, (unsigned long long)bd_phys);
/* Allocate space for descriptor status FIFO */
- sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
- bd_num : TSI721_DMA_MINSTSSZ;
+ sts_size = ((bd_num + 1) >= TSI721_DMA_MINSTSSZ) ?
+ (bd_num + 1) : TSI721_DMA_MINSTSSZ;
sts_size = roundup_pow_of_two(sts_size);
sts_ptr = dma_zalloc_coherent(dev,
sts_size * sizeof(struct tsi721_dma_sts),
@@ -94,7 +112,7 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
if (!sts_ptr) {
/* Free space allocated for DMA descriptors */
dma_free_coherent(dev,
- bd_num * sizeof(struct tsi721_dma_desc),
+ (bd_num + 1) * sizeof(struct tsi721_dma_desc),
bd_ptr, bd_phys);
bdma_chan->bd_base = NULL;
return -ENOMEM;
@@ -108,11 +126,11 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
sts_ptr, (unsigned long long)sts_phys, sts_size);
- /* Initialize DMA descriptors ring */
- bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29);
- bd_ptr[bd_num - 1].next_lo = cpu_to_le32((u64)bd_phys &
+ /* Initialize DMA descriptors ring using added link descriptor */
+ bd_ptr[bd_num].type_id = cpu_to_le32(DTYPE3 << 29);
+ bd_ptr[bd_num].next_lo = cpu_to_le32((u64)bd_phys &
TSI721_DMAC_DPTRL_MASK);
- bd_ptr[bd_num - 1].next_hi = cpu_to_le32((u64)bd_phys >> 32);
+ bd_ptr[bd_num].next_hi = cpu_to_le32((u64)bd_phys >> 32);
/* Setup DMA descriptor pointers */
iowrite32(((u64)bd_phys >> 32),
@@ -134,6 +152,55 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
ioread32(bdma_chan->regs + TSI721_DMAC_INT);
+#ifdef CONFIG_PCI_MSI
+ /* Request interrupt service if we are in MSI-X mode */
+ if (priv->flags & TSI721_USING_MSIX) {
+ int rc, idx;
+
+ idx = TSI721_VECT_DMA0_DONE + bdma_chan->id;
+
+ rc = request_irq(priv->msix[idx].vector, tsi721_bdma_msix, 0,
+ priv->msix[idx].irq_name, (void *)bdma_chan);
+
+ if (rc) {
+ dev_dbg(dev, "Unable to get MSI-X for BDMA%d-DONE\n",
+ bdma_chan->id);
+ goto err_out;
+ }
+
+ idx = TSI721_VECT_DMA0_INT + bdma_chan->id;
+
+ rc = request_irq(priv->msix[idx].vector, tsi721_bdma_msix, 0,
+ priv->msix[idx].irq_name, (void *)bdma_chan);
+
+ if (rc) {
+ dev_dbg(dev, "Unable to get MSI-X for BDMA%d-INT\n",
+ bdma_chan->id);
+ free_irq(
+ priv->msix[TSI721_VECT_DMA0_DONE +
+ bdma_chan->id].vector,
+ (void *)bdma_chan);
+ }
+
+err_out:
+ if (rc) {
+ /* Free space allocated for DMA descriptors */
+ dma_free_coherent(dev,
+ (bd_num + 1) * sizeof(struct tsi721_dma_desc),
+ bd_ptr, bd_phys);
+ bdma_chan->bd_base = NULL;
+
+ /* Free space allocated for status descriptors */
+ dma_free_coherent(dev,
+ sts_size * sizeof(struct tsi721_dma_sts),
+ sts_ptr, sts_phys);
+ bdma_chan->sts_base = NULL;
+
+ return -EIO;
+ }
+ }
+#endif /* CONFIG_PCI_MSI */
+
/* Toggle DMA channel initialization */
iowrite32(TSI721_DMAC_CTL_INIT, bdma_chan->regs + TSI721_DMAC_CTL);
ioread32(bdma_chan->regs + TSI721_DMAC_CTL);
@@ -147,6 +214,9 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan)
static int tsi721_bdma_ch_free(struct tsi721_bdma_chan *bdma_chan)
{
u32 ch_stat;
+#ifdef CONFIG_PCI_MSI
+ struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device);
+#endif
if (bdma_chan->bd_base == NULL)
return 0;
@@ -159,9 +229,18 @@ static int tsi721_bdma_ch_free(struct tsi721_bdma_chan *bdma_chan)
/* Put DMA channel into init state */
iowrite32(TSI721_DMAC_CTL_INIT, bdma_chan->regs + TSI721_DMAC_CTL);
+#ifdef CONFIG_PCI_MSI
+ if (priv->flags & TSI721_USING_MSIX) {
+ free_irq(priv->msix[TSI721_VECT_DMA0_DONE +
+ bdma_chan->id].vector, (void *)bdma_chan);
+ free_irq(priv->msix[TSI721_VECT_DMA0_INT +
+ bdma_chan->id].vector, (void *)bdma_chan);
+ }
+#endif /* CONFIG_PCI_MSI */
+
/* Free space allocated for DMA descriptors */
dma_free_coherent(bdma_chan->dchan.device->dev,
- bdma_chan->bd_num * sizeof(struct tsi721_dma_desc),
+ (bdma_chan->bd_num + 1) * sizeof(struct tsi721_dma_desc),
bdma_chan->bd_base, bdma_chan->bd_phys);
bdma_chan->bd_base = NULL;
@@ -243,8 +322,8 @@ static void tsi721_start_dma(struct tsi721_bdma_chan *bdma_chan)
}
dev_dbg(bdma_chan->dchan.device->dev,
- "tx_chan: %p, chan: %d, regs: %p\n",
- bdma_chan, bdma_chan->dchan.chan_id, bdma_chan->regs);
+ "%s: chan_%d (wrc=%d)\n", __func__, bdma_chan->id,
+ bdma_chan->wr_count_next);
iowrite32(bdma_chan->wr_count_next,
bdma_chan->regs + TSI721_DMAC_DWRCNT);
@@ -253,72 +332,19 @@ static void tsi721_start_dma(struct tsi721_bdma_chan *bdma_chan)
bdma_chan->wr_count = bdma_chan->wr_count_next;
}
-static void tsi721_desc_put(struct tsi721_bdma_chan *bdma_chan,
- struct tsi721_tx_desc *desc)
-{
- dev_dbg(bdma_chan->dchan.device->dev,
- "Put desc: %p into free list\n", desc);
-
- if (desc) {
- spin_lock_bh(&bdma_chan->lock);
- list_splice_init(&desc->tx_list, &bdma_chan->free_list);
- list_add(&desc->desc_node, &bdma_chan->free_list);
- bdma_chan->wr_count_next = bdma_chan->wr_count;
- spin_unlock_bh(&bdma_chan->lock);
- }
-}
-
-static
-struct tsi721_tx_desc *tsi721_desc_get(struct tsi721_bdma_chan *bdma_chan)
-{
- struct tsi721_tx_desc *tx_desc, *_tx_desc;
- struct tsi721_tx_desc *ret = NULL;
- int i;
-
- spin_lock_bh(&bdma_chan->lock);
- list_for_each_entry_safe(tx_desc, _tx_desc,
- &bdma_chan->free_list, desc_node) {
- if (async_tx_test_ack(&tx_desc->txd)) {
- list_del(&tx_desc->desc_node);
- ret = tx_desc;
- break;
- }
- dev_dbg(bdma_chan->dchan.device->dev,
- "desc %p not ACKed\n", tx_desc);
- }
-
- if (ret == NULL) {
- dev_dbg(bdma_chan->dchan.device->dev,
- "%s: unable to obtain tx descriptor\n", __func__);
- goto err_out;
- }
-
- i = bdma_chan->wr_count_next % bdma_chan->bd_num;
- if (i == bdma_chan->bd_num - 1) {
- i = 0;
- bdma_chan->wr_count_next++; /* skip link descriptor */
- }
-
- bdma_chan->wr_count_next++;
- tx_desc->txd.phys = bdma_chan->bd_phys +
- i * sizeof(struct tsi721_dma_desc);
- tx_desc->hw_desc = &((struct tsi721_dma_desc *)bdma_chan->bd_base)[i];
-err_out:
- spin_unlock_bh(&bdma_chan->lock);
-
- return ret;
-}
-
static int
-tsi721_desc_fill_init(struct tsi721_tx_desc *desc, struct scatterlist *sg,
- enum dma_rtype rtype, u32 sys_size)
+tsi721_desc_fill_init(struct tsi721_tx_desc *desc,
+ struct tsi721_dma_desc *bd_ptr,
+ struct scatterlist *sg, u32 sys_size)
{
- struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
u64 rio_addr;
+ if (bd_ptr == NULL)
+ return -EINVAL;
+
/* Initialize DMA descriptor */
bd_ptr->type_id = cpu_to_le32((DTYPE1 << 29) |
- (rtype << 19) | desc->destid);
+ (desc->rtype << 19) | desc->destid);
bd_ptr->bcount = cpu_to_le32(((desc->rio_addr & 0x3) << 30) |
(sys_size << 26));
rio_addr = (desc->rio_addr >> 2) |
@@ -335,51 +361,32 @@ tsi721_desc_fill_init(struct tsi721_tx_desc *desc, struct scatterlist *sg,
}
static int
-tsi721_desc_fill_end(struct tsi721_tx_desc *desc)
+tsi721_desc_fill_end(struct tsi721_dma_desc *bd_ptr, u32 bcount, bool interrupt)
{
- struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
+ if (bd_ptr == NULL)
+ return -EINVAL;
/* Update DMA descriptor */
- if (desc->interrupt)
+ if (interrupt)
bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF);
- bd_ptr->bcount |= cpu_to_le32(desc->bcount & TSI721_DMAD_BCOUNT1);
+ bd_ptr->bcount |= cpu_to_le32(bcount & TSI721_DMAD_BCOUNT1);
return 0;
}
-
-static void tsi721_dma_chain_complete(struct tsi721_bdma_chan *bdma_chan,
- struct tsi721_tx_desc *desc)
+static void tsi721_dma_tx_err(struct tsi721_bdma_chan *bdma_chan,
+ struct tsi721_tx_desc *desc)
{
struct dma_async_tx_descriptor *txd = &desc->txd;
dma_async_tx_callback callback = txd->callback;
void *param = txd->callback_param;
- list_splice_init(&desc->tx_list, &bdma_chan->free_list);
list_move(&desc->desc_node, &bdma_chan->free_list);
- bdma_chan->completed_cookie = txd->cookie;
if (callback)
callback(param);
}
-static void tsi721_dma_complete_all(struct tsi721_bdma_chan *bdma_chan)
-{
- struct tsi721_tx_desc *desc, *_d;
- LIST_HEAD(list);
-
- BUG_ON(!tsi721_dma_is_idle(bdma_chan));
-
- if (!list_empty(&bdma_chan->queue))
- tsi721_start_dma(bdma_chan);
-
- list_splice_init(&bdma_chan->active_list, &list);
- list_splice_init(&bdma_chan->queue, &bdma_chan->active_list);
-
- list_for_each_entry_safe(desc, _d, &list, desc_node)
- tsi721_dma_chain_complete(bdma_chan, desc);
-}
-
static void tsi721_clr_stat(struct tsi721_bdma_chan *bdma_chan)
{
u32 srd_ptr;
@@ -403,20 +410,159 @@ static void tsi721_clr_stat(struct tsi721_bdma_chan *bdma_chan)
bdma_chan->sts_rdptr = srd_ptr;
}
+/* Must be called with the channel spinlock held */
+static int tsi721_submit_sg(struct tsi721_tx_desc *desc)
+{
+ struct dma_chan *dchan = desc->txd.chan;
+ struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
+ u32 sys_size;
+ u64 rio_addr;
+ dma_addr_t next_addr;
+ u32 bcount;
+ struct scatterlist *sg;
+ unsigned int i;
+ int err = 0;
+ struct tsi721_dma_desc *bd_ptr = NULL;
+ u32 idx, rd_idx;
+ u32 add_count = 0;
+
+ if (!tsi721_dma_is_idle(bdma_chan)) {
+ dev_err(bdma_chan->dchan.device->dev,
+ "BUG: Attempt to use non-idle channel\n");
+ return -EIO;
+ }
+
+ /*
+ * Fill DMA channel's hardware buffer descriptors.
+ * (NOTE: RapidIO destination address is limited to 64 bits for now)
+ */
+ rio_addr = desc->rio_addr;
+ next_addr = -1;
+ bcount = 0;
+ sys_size = dma_to_mport(bdma_chan->dchan.device)->sys_size;
+
+ rd_idx = ioread32(bdma_chan->regs + TSI721_DMAC_DRDCNT);
+ rd_idx %= (bdma_chan->bd_num + 1);
+
+ idx = bdma_chan->wr_count_next % (bdma_chan->bd_num + 1);
+ if (idx == bdma_chan->bd_num) {
+ /* wrap around link descriptor */
+ idx = 0;
+ add_count++;
+ }
+
+ dev_dbg(dchan->device->dev, "%s: BD ring status: rdi=%d wri=%d\n",
+ __func__, rd_idx, idx);
+
+ for_each_sg(desc->sg, sg, desc->sg_len, i) {
+
+ dev_dbg(dchan->device->dev, "sg%d/%d addr: 0x%llx len: %d\n",
+ i, desc->sg_len,
+ (unsigned long long)sg_dma_address(sg), sg_dma_len(sg));
+
+ if (sg_dma_len(sg) > TSI721_BDMA_MAX_BCOUNT) {
+ dev_err(dchan->device->dev,
+ "%s: SG entry %d is too large\n", __func__, i);
+ err = -EINVAL;
+ break;
+ }
+
+ /*
+ * If this sg entry forms contiguous block with previous one,
+ * try to merge it into existing DMA descriptor
+ */
+ if (next_addr == sg_dma_address(sg) &&
+ bcount + sg_dma_len(sg) <= TSI721_BDMA_MAX_BCOUNT) {
+ /* Adjust byte count of the descriptor */
+ bcount += sg_dma_len(sg);
+ goto entry_done;
+ } else if (next_addr != -1) {
+ /* Finalize descriptor using total byte count value */
+ tsi721_desc_fill_end(bd_ptr, bcount, 0);
+ dev_dbg(dchan->device->dev,
+ "%s: prev desc final len: %d\n",
+ __func__, bcount);
+ }
+
+ desc->rio_addr = rio_addr;
+
+ if (i && idx == rd_idx) {
+ dev_dbg(dchan->device->dev,
+ "%s: HW descriptor ring is full @ %d\n",
+ __func__, i);
+ desc->sg = sg;
+ desc->sg_len -= i;
+ break;
+ }
+
+ bd_ptr = &((struct tsi721_dma_desc *)bdma_chan->bd_base)[idx];
+ err = tsi721_desc_fill_init(desc, bd_ptr, sg, sys_size);
+ if (err) {
+ dev_err(dchan->device->dev,
+ "Failed to build desc: err=%d\n", err);
+ break;
+ }
+
+ dev_dbg(dchan->device->dev, "bd_ptr = %p did=%d raddr=0x%llx\n",
+ bd_ptr, desc->destid, desc->rio_addr);
+
+ next_addr = sg_dma_address(sg);
+ bcount = sg_dma_len(sg);
+
+ add_count++;
+ if (++idx == bdma_chan->bd_num) {
+ /* wrap around link descriptor */
+ idx = 0;
+ add_count++;
+ }
+
+entry_done:
+ if (sg_is_last(sg)) {
+ tsi721_desc_fill_end(bd_ptr, bcount, 0);
+ dev_dbg(dchan->device->dev, "%s: last desc final len: %d\n",
+ __func__, bcount);
+ desc->sg_len = 0;
+ } else {
+ rio_addr += sg_dma_len(sg);
+ next_addr += sg_dma_len(sg);
+ }
+ }
+
+ if (!err)
+ bdma_chan->wr_count_next += add_count;
+
+ return err;
+}
+
static void tsi721_advance_work(struct tsi721_bdma_chan *bdma_chan)
{
- if (list_empty(&bdma_chan->active_list) ||
- list_is_singular(&bdma_chan->active_list)) {
- dev_dbg(bdma_chan->dchan.device->dev,
- "%s: Active_list empty\n", __func__);
- tsi721_dma_complete_all(bdma_chan);
- } else {
- dev_dbg(bdma_chan->dchan.device->dev,
- "%s: Active_list NOT empty\n", __func__);
- tsi721_dma_chain_complete(bdma_chan,
- tsi721_dma_first_active(bdma_chan));
- tsi721_start_dma(bdma_chan);
+ struct tsi721_tx_desc *desc;
+ int err;
+
+ dev_dbg(bdma_chan->dchan.device->dev, "%s: Enter\n", __func__);
+
+ /*
+ * If there are any new transactions in the queue add them
+ * into the processing list
+ */
+ if (!list_empty(&bdma_chan->queue))
+ list_splice_init(&bdma_chan->queue, &bdma_chan->active_list);
+
+ /* Start new transaction (if available) */
+ if (!list_empty(&bdma_chan->active_list)) {
+ desc = tsi721_dma_first_active(bdma_chan);
+ err = tsi721_submit_sg(desc);
+ if (!err)
+ tsi721_start_dma(bdma_chan);
+ else {
+ tsi721_dma_tx_err(bdma_chan, desc);
+ dev_dbg(bdma_chan->dchan.device->dev,
+ "ERR: tsi721_submit_sg failed with err=%d\n",
+ err);
+ }
}
+
+ dev_dbg(bdma_chan->dchan.device->dev, "%s: Exit\n", __func__);
}
static void tsi721_dma_tasklet(unsigned long data)
@@ -444,8 +590,29 @@ static void tsi721_dma_tasklet(unsigned long data)
}
if (dmac_int & (TSI721_DMAC_INT_DONE | TSI721_DMAC_INT_IOFDONE)) {
+ struct tsi721_tx_desc *desc;
+
tsi721_clr_stat(bdma_chan);
spin_lock(&bdma_chan->lock);
+ desc = tsi721_dma_first_active(bdma_chan);
+
+ if (desc->sg_len == 0) {
+ dma_async_tx_callback callback = NULL;
+ void *param = NULL;
+
+ desc->status = DMA_COMPLETE;
+ dma_cookie_complete(&desc->txd);
+ if (desc->txd.flags & DMA_PREP_INTERRUPT) {
+ callback = desc->txd.callback;
+ param = desc->txd.callback_param;
+ }
+ list_move(&desc->desc_node, &bdma_chan->free_list);
+ spin_unlock(&bdma_chan->lock);
+ if (callback)
+ callback(param);
+ spin_lock(&bdma_chan->lock);
+ }
+
tsi721_advance_work(bdma_chan);
spin_unlock(&bdma_chan->lock);
}
@@ -460,21 +627,24 @@ static dma_cookie_t tsi721_tx_submit(struct dma_async_tx_descriptor *txd)
struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(txd->chan);
dma_cookie_t cookie;
- spin_lock_bh(&bdma_chan->lock);
+ /* Check if the descriptor is detached from any lists */
+ if (!list_empty(&desc->desc_node)) {
+ dev_err(bdma_chan->dchan.device->dev,
+ "%s: wrong state of descriptor %p\n", __func__, txd);
+ return -EIO;
+ }
- cookie = txd->chan->cookie;
- if (++cookie < 0)
- cookie = 1;
- txd->chan->cookie = cookie;
- txd->cookie = cookie;
+ spin_lock_bh(&bdma_chan->lock);
- if (list_empty(&bdma_chan->active_list)) {
- list_add_tail(&desc->desc_node, &bdma_chan->active_list);
- tsi721_start_dma(bdma_chan);
- } else {
- list_add_tail(&desc->desc_node, &bdma_chan->queue);
+ if (!bdma_chan->active) {
+ spin_unlock_bh(&bdma_chan->lock);
+ return -ENODEV;
}
+ cookie = dma_cookie_assign(txd);
+ desc->status = DMA_IN_PROGRESS;
+ list_add_tail(&desc->desc_node, &bdma_chan->queue);
+
spin_unlock_bh(&bdma_chan->lock);
return cookie;
}
@@ -482,115 +652,52 @@ static dma_cookie_t tsi721_tx_submit(struct dma_async_tx_descriptor *txd)
static int tsi721_alloc_chan_resources(struct dma_chan *dchan)
{
struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
-#ifdef CONFIG_PCI_MSI
- struct tsi721_device *priv = to_tsi721(dchan->device);
-#endif
struct tsi721_tx_desc *desc = NULL;
- LIST_HEAD(tmp_list);
int i;
- int rc;
+
+ dev_dbg(dchan->device->dev, "%s: for channel %d\n",
+ __func__, bdma_chan->id);
if (bdma_chan->bd_base)
- return bdma_chan->bd_num - 1;
+ return TSI721_DMA_TX_QUEUE_SZ;
/* Initialize BDMA channel */
- if (tsi721_bdma_ch_init(bdma_chan)) {
+ if (tsi721_bdma_ch_init(bdma_chan, dma_desc_per_channel)) {
dev_err(dchan->device->dev, "Unable to initialize data DMA"
" channel %d, aborting\n", bdma_chan->id);
- return -ENOMEM;
+ return -ENODEV;
}
- /* Alocate matching number of logical descriptors */
- desc = kcalloc((bdma_chan->bd_num - 1), sizeof(struct tsi721_tx_desc),
+ /* Allocate queue of transaction descriptors */
+ desc = kcalloc(TSI721_DMA_TX_QUEUE_SZ, sizeof(struct tsi721_tx_desc),
GFP_KERNEL);
if (!desc) {
dev_err(dchan->device->dev,
"Failed to allocate logical descriptors\n");
- rc = -ENOMEM;
- goto err_out;
+ tsi721_bdma_ch_free(bdma_chan);
+ return -ENOMEM;
}
bdma_chan->tx_desc = desc;
- for (i = 0; i < bdma_chan->bd_num - 1; i++) {
+ for (i = 0; i < TSI721_DMA_TX_QUEUE_SZ; i++) {
dma_async_tx_descriptor_init(&desc[i].txd, dchan);
desc[i].txd.tx_submit = tsi721_tx_submit;
desc[i].txd.flags = DMA_CTRL_ACK;
- INIT_LIST_HEAD(&desc[i].tx_list);
- list_add_tail(&desc[i].desc_node, &tmp_list);
+ list_add(&desc[i].desc_node, &bdma_chan->free_list);
}
- spin_lock_bh(&bdma_chan->lock);
- list_splice(&tmp_list, &bdma_chan->free_list);
- bdma_chan->completed_cookie = dchan->cookie = 1;
- spin_unlock_bh(&bdma_chan->lock);
-
-#ifdef CONFIG_PCI_MSI
- if (priv->flags & TSI721_USING_MSIX) {
- /* Request interrupt service if we are in MSI-X mode */
- rc = request_irq(
- priv->msix[TSI721_VECT_DMA0_DONE +
- bdma_chan->id].vector,
- tsi721_bdma_msix, 0,
- priv->msix[TSI721_VECT_DMA0_DONE +
- bdma_chan->id].irq_name,
- (void *)bdma_chan);
-
- if (rc) {
- dev_dbg(dchan->device->dev,
- "Unable to allocate MSI-X interrupt for "
- "BDMA%d-DONE\n", bdma_chan->id);
- goto err_out;
- }
-
- rc = request_irq(priv->msix[TSI721_VECT_DMA0_INT +
- bdma_chan->id].vector,
- tsi721_bdma_msix, 0,
- priv->msix[TSI721_VECT_DMA0_INT +
- bdma_chan->id].irq_name,
- (void *)bdma_chan);
-
- if (rc) {
- dev_dbg(dchan->device->dev,
- "Unable to allocate MSI-X interrupt for "
- "BDMA%d-INT\n", bdma_chan->id);
- free_irq(
- priv->msix[TSI721_VECT_DMA0_DONE +
- bdma_chan->id].vector,
- (void *)bdma_chan);
- rc = -EIO;
- goto err_out;
- }
- }
-#endif /* CONFIG_PCI_MSI */
+ dma_cookie_init(dchan);
bdma_chan->active = true;
tsi721_bdma_interrupt_enable(bdma_chan, 1);
- return bdma_chan->bd_num - 1;
-
-err_out:
- kfree(desc);
- tsi721_bdma_ch_free(bdma_chan);
- return rc;
+ return TSI721_DMA_TX_QUEUE_SZ;
}
-static void tsi721_free_chan_resources(struct dma_chan *dchan)
+static void tsi721_sync_dma_irq(struct tsi721_bdma_chan *bdma_chan)
{
- struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
- struct tsi721_device *priv = to_tsi721(dchan->device);
- LIST_HEAD(list);
-
- dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
-
- if (bdma_chan->bd_base == NULL)
- return;
-
- BUG_ON(!list_empty(&bdma_chan->active_list));
- BUG_ON(!list_empty(&bdma_chan->queue));
-
- tsi721_bdma_interrupt_enable(bdma_chan, 0);
- bdma_chan->active = false;
+ struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device);
#ifdef CONFIG_PCI_MSI
if (priv->flags & TSI721_USING_MSIX) {
@@ -601,64 +708,48 @@ static void tsi721_free_chan_resources(struct dma_chan *dchan)
} else
#endif
synchronize_irq(priv->pdev->irq);
+}
- tasklet_kill(&bdma_chan->tasklet);
+static void tsi721_free_chan_resources(struct dma_chan *dchan)
+{
+ struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
- spin_lock_bh(&bdma_chan->lock);
- list_splice_init(&bdma_chan->free_list, &list);
- spin_unlock_bh(&bdma_chan->lock);
+ dev_dbg(dchan->device->dev, "%s: for channel %d\n",
+ __func__, bdma_chan->id);
-#ifdef CONFIG_PCI_MSI
- if (priv->flags & TSI721_USING_MSIX) {
- free_irq(priv->msix[TSI721_VECT_DMA0_DONE +
- bdma_chan->id].vector, (void *)bdma_chan);
- free_irq(priv->msix[TSI721_VECT_DMA0_INT +
- bdma_chan->id].vector, (void *)bdma_chan);
- }
-#endif /* CONFIG_PCI_MSI */
+ if (bdma_chan->bd_base == NULL)
+ return;
- tsi721_bdma_ch_free(bdma_chan);
+ BUG_ON(!list_empty(&bdma_chan->active_list));
+ BUG_ON(!list_empty(&bdma_chan->queue));
+
+ tsi721_bdma_interrupt_enable(bdma_chan, 0);
+ bdma_chan->active = false;
+ tsi721_sync_dma_irq(bdma_chan);
+ tasklet_kill(&bdma_chan->tasklet);
+ INIT_LIST_HEAD(&bdma_chan->free_list);
kfree(bdma_chan->tx_desc);
+ tsi721_bdma_ch_free(bdma_chan);
}
static
enum dma_status tsi721_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
struct dma_tx_state *txstate)
{
- struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
- dma_cookie_t last_used;
- dma_cookie_t last_completed;
- int ret;
-
- spin_lock_bh(&bdma_chan->lock);
- last_completed = bdma_chan->completed_cookie;
- last_used = dchan->cookie;
- spin_unlock_bh(&bdma_chan->lock);
-
- ret = dma_async_is_complete(cookie, last_completed, last_used);
-
- dma_set_tx_state(txstate, last_completed, last_used, 0);
-
- dev_dbg(dchan->device->dev,
- "%s: exit, ret: %d, last_completed: %d, last_used: %d\n",
- __func__, ret, last_completed, last_used);
-
- return ret;
+ return dma_cookie_status(dchan, cookie, txstate);
}
static void tsi721_issue_pending(struct dma_chan *dchan)
{
struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
- dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
+ dev_dbg(dchan->device->dev, "%s: Enter\n", __func__);
- if (tsi721_dma_is_idle(bdma_chan)) {
+ if (tsi721_dma_is_idle(bdma_chan) && bdma_chan->active) {
spin_lock_bh(&bdma_chan->lock);
tsi721_advance_work(bdma_chan);
spin_unlock_bh(&bdma_chan->lock);
- } else
- dev_dbg(dchan->device->dev,
- "%s: DMA channel still busy\n", __func__);
+ }
}
static
@@ -668,21 +759,19 @@ struct dma_async_tx_descriptor *tsi721_prep_rio_sg(struct dma_chan *dchan,
void *tinfo)
{
struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
- struct tsi721_tx_desc *desc = NULL;
- struct tsi721_tx_desc *first = NULL;
- struct scatterlist *sg;
+ struct tsi721_tx_desc *desc, *_d;
struct rio_dma_ext *rext = tinfo;
- u64 rio_addr = rext->rio_addr; /* limited to 64-bit rio_addr for now */
- unsigned int i;
- u32 sys_size = dma_to_mport(dchan->device)->sys_size;
enum dma_rtype rtype;
- dma_addr_t next_addr = -1;
+ struct dma_async_tx_descriptor *txd = NULL;
if (!sgl || !sg_len) {
dev_err(dchan->device->dev, "%s: No SG list\n", __func__);
return NULL;
}
+ dev_dbg(dchan->device->dev, "%s: %s\n", __func__,
+ (dir == DMA_DEV_TO_MEM)?"READ":"WRITE");
+
if (dir == DMA_DEV_TO_MEM)
rtype = NREAD;
else if (dir == DMA_MEM_TO_DEV) {
@@ -704,97 +793,26 @@ struct dma_async_tx_descriptor *tsi721_prep_rio_sg(struct dma_chan *dchan,
return NULL;
}
- for_each_sg(sgl, sg, sg_len, i) {
- int err;
-
- if (sg_dma_len(sg) > TSI721_BDMA_MAX_BCOUNT) {
- dev_err(dchan->device->dev,
- "%s: SG entry %d is too large\n", __func__, i);
- goto err_desc_put;
- }
-
- /*
- * If this sg entry forms contiguous block with previous one,
- * try to merge it into existing DMA descriptor
- */
- if (desc) {
- if (next_addr == sg_dma_address(sg) &&
- desc->bcount + sg_dma_len(sg) <=
- TSI721_BDMA_MAX_BCOUNT) {
- /* Adjust byte count of the descriptor */
- desc->bcount += sg_dma_len(sg);
- goto entry_done;
- }
-
- /*
- * Finalize this descriptor using total
- * byte count value.
- */
- tsi721_desc_fill_end(desc);
- dev_dbg(dchan->device->dev, "%s: desc final len: %d\n",
- __func__, desc->bcount);
- }
-
- /*
- * Obtain and initialize a new descriptor
- */
- desc = tsi721_desc_get(bdma_chan);
- if (!desc) {
- dev_err(dchan->device->dev,
- "%s: Failed to get new descriptor for SG %d\n",
- __func__, i);
- goto err_desc_put;
- }
-
- desc->destid = rext->destid;
- desc->rio_addr = rio_addr;
- desc->rio_addr_u = 0;
- desc->bcount = sg_dma_len(sg);
-
- dev_dbg(dchan->device->dev,
- "sg%d desc: 0x%llx, addr: 0x%llx len: %d\n",
- i, (u64)desc->txd.phys,
- (unsigned long long)sg_dma_address(sg),
- sg_dma_len(sg));
-
- dev_dbg(dchan->device->dev,
- "bd_ptr = %p did=%d raddr=0x%llx\n",
- desc->hw_desc, desc->destid, desc->rio_addr);
-
- err = tsi721_desc_fill_init(desc, sg, rtype, sys_size);
- if (err) {
- dev_err(dchan->device->dev,
- "Failed to build desc: %d\n", err);
- goto err_desc_put;
- }
-
- next_addr = sg_dma_address(sg);
-
- if (!first)
- first = desc;
- else
- list_add_tail(&desc->desc_node, &first->tx_list);
+ spin_lock_bh(&bdma_chan->lock);
-entry_done:
- if (sg_is_last(sg)) {
- desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0;
- tsi721_desc_fill_end(desc);
- dev_dbg(dchan->device->dev, "%s: desc final len: %d\n",
- __func__, desc->bcount);
- } else {
- rio_addr += sg_dma_len(sg);
- next_addr += sg_dma_len(sg);
+ list_for_each_entry_safe(desc, _d, &bdma_chan->free_list, desc_node) {
+ if (async_tx_test_ack(&desc->txd)) {
+ list_del_init(&desc->desc_node);
+ desc->destid = rext->destid;
+ desc->rio_addr = rext->rio_addr;
+ desc->rio_addr_u = 0;
+ desc->rtype = rtype;
+ desc->sg_len = sg_len;
+ desc->sg = sgl;
+ txd = &desc->txd;
+ txd->flags = flags;
+ break;
}
}
- first->txd.cookie = -EBUSY;
- desc->txd.flags = flags;
-
- return &first->txd;
+ spin_unlock_bh(&bdma_chan->lock);
-err_desc_put:
- tsi721_desc_put(bdma_chan, first);
- return NULL;
+ return txd;
}
static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
@@ -802,23 +820,34 @@ static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
{
struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
struct tsi721_tx_desc *desc, *_d;
+ u32 dmac_int;
LIST_HEAD(list);
dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
if (cmd != DMA_TERMINATE_ALL)
- return -ENXIO;
+ return -ENOSYS;
spin_lock_bh(&bdma_chan->lock);
- /* make sure to stop the transfer */
- iowrite32(TSI721_DMAC_CTL_SUSP, bdma_chan->regs + TSI721_DMAC_CTL);
+ bdma_chan->active = false;
+
+ if (!tsi721_dma_is_idle(bdma_chan)) {
+ /* make sure to stop the transfer */
+ iowrite32(TSI721_DMAC_CTL_SUSP,
+ bdma_chan->regs + TSI721_DMAC_CTL);
+
+ /* Wait until DMA channel stops */
+ do {
+ dmac_int = ioread32(bdma_chan->regs + TSI721_DMAC_INT);
+ } while ((dmac_int & TSI721_DMAC_INT_SUSP) == 0);
+ }
list_splice_init(&bdma_chan->active_list, &list);
list_splice_init(&bdma_chan->queue, &list);
list_for_each_entry_safe(desc, _d, &list, desc_node)
- tsi721_dma_chain_complete(bdma_chan, desc);
+ tsi721_dma_tx_err(bdma_chan, desc);
spin_unlock_bh(&bdma_chan->lock);
@@ -828,22 +857,18 @@ static int tsi721_device_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
int tsi721_register_dma(struct tsi721_device *priv)
{
int i;
- int nr_channels = TSI721_DMA_MAXCH;
+ int nr_channels = 0;
int err;
struct rio_mport *mport = priv->mport;
- mport->dma.dev = &priv->pdev->dev;
- mport->dma.chancnt = nr_channels;
-
INIT_LIST_HEAD(&mport->dma.channels);
- for (i = 0; i < nr_channels; i++) {
+ for (i = 0; i < TSI721_DMA_MAXCH; i++) {
struct tsi721_bdma_chan *bdma_chan = &priv->bdma[i];
if (i == TSI721_DMACH_MAINT)
continue;
- bdma_chan->bd_num = TSI721_BDMA_BD_RING_SZ;
bdma_chan->regs = priv->regs + TSI721_DMAC_BASE(i);
bdma_chan->dchan.device = &mport->dma;
@@ -862,12 +887,15 @@ int tsi721_register_dma(struct tsi721_device *priv)
(unsigned long)bdma_chan);
list_add_tail(&bdma_chan->dchan.device_node,
&mport->dma.channels);
+ nr_channels++;
}
+ mport->dma.chancnt = nr_channels;
dma_cap_zero(mport->dma.cap_mask);
dma_cap_set(DMA_PRIVATE, mport->dma.cap_mask);
dma_cap_set(DMA_SLAVE, mport->dma.cap_mask);
+ mport->dma.dev = &priv->pdev->dev;
mport->dma.device_alloc_chan_resources = tsi721_alloc_chan_resources;
mport->dma.device_free_chan_resources = tsi721_free_chan_resources;
mport->dma.device_tx_status = tsi721_tx_status;
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index a54ba0494dd3..d7b87c64b7cd 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1509,30 +1509,39 @@ EXPORT_SYMBOL_GPL(rio_route_clr_table);
static bool rio_chan_filter(struct dma_chan *chan, void *arg)
{
- struct rio_dev *rdev = arg;
+ struct rio_mport *mport = arg;
/* Check that DMA device belongs to the right MPORT */
- return (rdev->net->hport ==
- container_of(chan->device, struct rio_mport, dma));
+ return mport == container_of(chan->device, struct rio_mport, dma);
}
/**
- * rio_request_dma - request RapidIO capable DMA channel that supports
- * specified target RapidIO device.
- * @rdev: RIO device control structure
+ * rio_request_mport_dma - request RapidIO capable DMA channel associated
+ * with specified local RapidIO mport device.
+ * @mport: RIO mport to perform DMA data transfers
*
* Returns pointer to allocated DMA channel or NULL if failed.
*/
-struct dma_chan *rio_request_dma(struct rio_dev *rdev)
+struct dma_chan *rio_request_mport_dma(struct rio_mport *mport)
{
dma_cap_mask_t mask;
- struct dma_chan *dchan;
dma_cap_zero(mask);
dma_cap_set(DMA_SLAVE, mask);
- dchan = dma_request_channel(mask, rio_chan_filter, rdev);
+ return dma_request_channel(mask, rio_chan_filter, mport);
+}
+EXPORT_SYMBOL_GPL(rio_request_mport_dma);
- return dchan;
+/**
+ * rio_request_dma - request RapidIO capable DMA channel that supports
+ * specified target RapidIO device.
+ * @rdev: RIO device associated with DMA transfer
+ *
+ * Returns pointer to allocated DMA channel or NULL if failed.
+ */
+struct dma_chan *rio_request_dma(struct rio_dev *rdev)
+{
+ return rio_request_mport_dma(rdev->net->hport);
}
EXPORT_SYMBOL_GPL(rio_request_dma);
@@ -1547,10 +1556,10 @@ void rio_release_dma(struct dma_chan *dchan)
EXPORT_SYMBOL_GPL(rio_release_dma);
/**
- * rio_dma_prep_slave_sg - RapidIO specific wrapper
+ * rio_dma_prep_xfer - RapidIO specific wrapper
* for device_prep_slave_sg callback defined by DMAENGINE.
- * @rdev: RIO device control structure
* @dchan: DMA channel to configure
+ * @destid: target RapidIO device destination ID
* @data: RIO specific data descriptor
* @direction: DMA data transfer direction (TO or FROM the device)
* @flags: dmaengine defined flags
@@ -1560,11 +1569,10 @@ EXPORT_SYMBOL_GPL(rio_release_dma);
* target RIO device.
* Returns pointer to DMA transaction descriptor or NULL if failed.
*/
-struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
- struct dma_chan *dchan, struct rio_dma_data *data,
+struct dma_async_tx_descriptor *rio_dma_prep_xfer(struct dma_chan *dchan,
+ u16 destid, struct rio_dma_data *data,
enum dma_transfer_direction direction, unsigned long flags)
{
- struct dma_async_tx_descriptor *txd = NULL;
struct rio_dma_ext rio_ext;
if (dchan->device->device_prep_slave_sg == NULL) {
@@ -1572,15 +1580,35 @@ struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
return NULL;
}
- rio_ext.destid = rdev->destid;
+ rio_ext.destid = destid;
rio_ext.rio_addr_u = data->rio_addr_u;
rio_ext.rio_addr = data->rio_addr;
rio_ext.wr_type = data->wr_type;
- txd = dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len,
- direction, flags, &rio_ext);
+ return dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len,
+ direction, flags, &rio_ext);
+}
+EXPORT_SYMBOL_GPL(rio_dma_prep_xfer);
- return txd;
+/**
+ * rio_dma_prep_slave_sg - RapidIO specific wrapper
+ * for device_prep_slave_sg callback defined by DMAENGINE.
+ * @rdev: RIO device control structure
+ * @dchan: DMA channel to configure
+ * @data: RIO specific data descriptor
+ * @direction: DMA data transfer direction (TO or FROM the device)
+ * @flags: dmaengine defined flags
+ *
+ * Initializes RapidIO capable DMA channel for the specified data transfer.
+ * Uses DMA channel private extension to pass information related to remote
+ * target RIO device.
+ * Returns pointer to DMA transaction descriptor or NULL if failed.
+ */
+struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
+ struct dma_chan *dchan, struct rio_dma_data *data,
+ enum dma_transfer_direction direction, unsigned long flags)
+{
+ return rio_dma_prep_xfer(dchan, rdev->destid, data, direction, flags);
}
EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 0754f5c7cb3b..a168e96142b9 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -373,6 +373,14 @@ config RTC_DRV_PCF8563
This driver can also be built as a module. If so, the module
will be called rtc-pcf8563.
+config RTC_DRV_PCF85063
+ tristate "nxp PCF85063"
+ help
+ If you say yes here you get support for the PCF85063 RTC chip
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-pcf85063.
+
config RTC_DRV_PCF8583
tristate "Philips PCF8583"
help
@@ -760,6 +768,15 @@ config RTC_DRV_DS1742
This driver can also be built as a module. If so, the module
will be called rtc-ds1742.
+config RTC_DRV_DS2404
+ tristate "Maxim/Dallas DS2404"
+ help
+ If you say yes here you get support for the
+ Dallas DS2404 RTC chip.
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-ds2404.
+
config RTC_DRV_DA9052
tristate "Dialog DA9052/DA9053 RTC"
depends on PMIC_DA9052
@@ -789,7 +806,7 @@ config RTC_DRV_DA9063
config RTC_DRV_EFI
tristate "EFI RTC"
- depends on IA64
+ depends on EFI
help
If you say yes here you will get support for the EFI
Real Time Clock.
@@ -873,15 +890,6 @@ config RTC_DRV_V3020
This driver can also be built as a module. If so, the module
will be called rtc-v3020.
-config RTC_DRV_DS2404
- tristate "Dallas DS2404"
- help
- If you say yes here you get support for the
- Dallas DS2404 RTC chip.
-
- This driver can also be built as a module. If so, the module
- will be called rtc-ds2404.
-
config RTC_DRV_WM831X
tristate "Wolfson Microelectronics WM831x RTC"
depends on MFD_WM831X
@@ -1349,6 +1357,7 @@ config RTC_DRV_SIRFSOC
config RTC_DRV_MOXART
tristate "MOXA ART RTC"
+ depends on ARCH_MOXART || COMPILE_TEST
help
If you say yes here you get support for the MOXA ART
RTC module.
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 70347d041d10..56f061c7c815 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -10,6 +10,10 @@ obj-$(CONFIG_RTC_SYSTOHC) += systohc.o
obj-$(CONFIG_RTC_CLASS) += rtc-core.o
rtc-core-y := class.o interface.o
+ifdef CONFIG_RTC_DRV_EFI
+rtc-core-y += rtc-efi-platform.o
+endif
+
rtc-core-$(CONFIG_RTC_INTF_DEV) += rtc-dev.o
rtc-core-$(CONFIG_RTC_INTF_PROC) += rtc-proc.o
rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o
@@ -93,6 +97,7 @@ obj-$(CONFIG_RTC_DRV_PCAP) += rtc-pcap.o
obj-$(CONFIG_RTC_DRV_PCF2127) += rtc-pcf2127.o
obj-$(CONFIG_RTC_DRV_PCF8523) += rtc-pcf8523.o
obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o
+obj-$(CONFIG_RTC_DRV_PCF85063) += rtc-pcf85063.o
obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o
obj-$(CONFIG_RTC_DRV_PCF2123) += rtc-pcf2123.o
obj-$(CONFIG_RTC_DRV_PCF50633) += rtc-pcf50633.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 589351ef75d0..38e26be705be 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -53,6 +53,7 @@ static int rtc_suspend(struct device *dev)
struct rtc_device *rtc = to_rtc_device(dev);
struct rtc_time tm;
struct timespec delta, delta_delta;
+ int err;
if (has_persistent_clock())
return 0;
@@ -61,7 +62,12 @@ static int rtc_suspend(struct device *dev)
return 0;
/* snapshot the current RTC and system time at suspend*/
- rtc_read_time(rtc, &tm);
+ err = rtc_read_time(rtc, &tm);
+ if (err < 0) {
+ pr_debug("%s: fail to read rtc time\n", dev_name(&rtc->dev));
+ return 0;
+ }
+
getnstimeofday(&old_system);
rtc_tm_to_time(&tm, &old_rtc.tv_sec);
@@ -94,6 +100,7 @@ static int rtc_resume(struct device *dev)
struct rtc_time tm;
struct timespec new_system, new_rtc;
struct timespec sleep_time;
+ int err;
if (has_persistent_clock())
return 0;
@@ -104,7 +111,12 @@ static int rtc_resume(struct device *dev)
/* snapshot the current rtc and system time at resume */
getnstimeofday(&new_system);
- rtc_read_time(rtc, &tm);
+ err = rtc_read_time(rtc, &tm);
+ if (err < 0) {
+ pr_debug("%s: fail to read rtc time\n", dev_name(&rtc->dev));
+ return 0;
+ }
+
if (rtc_valid_tm(&tm) != 0) {
pr_debug("%s: bogus resume time\n", dev_name(&rtc->dev));
return 0;
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 5813fa52c3d4..5b2717f5dafa 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -348,6 +348,8 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
/* Make sure we're not setting alarms in the past */
err = __rtc_read_time(rtc, &tm);
+ if (err)
+ return err;
rtc_tm_to_time(&tm, &now);
if (scheduled <= now)
return -ETIME;
diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c
index c3719189dd96..ae9f997223b1 100644
--- a/drivers/rtc/rtc-ds1343.c
+++ b/drivers/rtc/rtc-ds1343.c
@@ -4,6 +4,7 @@
* Real Time Clock
*
* Author : Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>
+ * Ankur Srivastava <sankurece@gmail.com> : DS1343 Nvram Support
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -45,6 +46,9 @@
#define DS1343_CONTROL_REG 0x0F
#define DS1343_STATUS_REG 0x10
#define DS1343_TRICKLE_REG 0x11
+#define DS1343_NVRAM 0x20
+
+#define DS1343_NVRAM_LEN 96
/* DS1343 Control Registers bits */
#define DS1343_EOSC 0x80
@@ -149,6 +153,64 @@ static ssize_t ds1343_store_glitchfilter(struct device *dev,
static DEVICE_ATTR(glitch_filter, S_IRUGO | S_IWUSR, ds1343_show_glitchfilter,
ds1343_store_glitchfilter);
+static ssize_t ds1343_nvram_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ int ret;
+ unsigned char address;
+ struct device *dev = kobj_to_dev(kobj);
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+
+ if (unlikely(!count))
+ return count;
+
+ if ((count + off) > DS1343_NVRAM_LEN)
+ count = DS1343_NVRAM_LEN - off;
+
+ address = DS1343_NVRAM + off;
+
+ ret = regmap_bulk_write(priv->map, address, buf, count);
+ if (ret < 0)
+ dev_err(&priv->spi->dev, "Error in nvram write %d", ret);
+
+ return (ret < 0) ? ret : count;
+}
+
+
+static ssize_t ds1343_nvram_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ int ret;
+ unsigned char address;
+ struct device *dev = kobj_to_dev(kobj);
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+
+ if (unlikely(!count))
+ return count;
+
+ if ((count + off) > DS1343_NVRAM_LEN)
+ count = DS1343_NVRAM_LEN - off;
+
+ address = DS1343_NVRAM + off;
+
+ ret = regmap_bulk_read(priv->map, address, buf, count);
+ if (ret < 0)
+ dev_err(&priv->spi->dev, "Error in nvram read %d\n", ret);
+
+ return (ret < 0) ? ret : count;
+}
+
+
+static struct bin_attribute nvram_attr = {
+ .attr.name = "nvram",
+ .attr.mode = S_IRUGO | S_IWUSR,
+ .read = ds1343_nvram_read,
+ .write = ds1343_nvram_write,
+ .size = DS1343_NVRAM_LEN,
+};
+
static ssize_t ds1343_show_alarmstatus(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -274,12 +336,16 @@ static int ds1343_sysfs_register(struct device *dev)
if (err)
goto error1;
+ err = device_create_bin_file(dev, &nvram_attr);
+ if (err)
+ goto error2;
+
if (priv->irq <= 0)
return err;
err = device_create_file(dev, &dev_attr_alarm_mode);
if (err)
- goto error2;
+ goto error3;
err = device_create_file(dev, &dev_attr_alarm_status);
if (!err)
@@ -287,6 +353,9 @@ static int ds1343_sysfs_register(struct device *dev)
device_remove_file(dev, &dev_attr_alarm_mode);
+error3:
+ device_remove_bin_file(dev, &nvram_attr);
+
error2:
device_remove_file(dev, &dev_attr_trickle_charger);
@@ -302,6 +371,7 @@ static void ds1343_sysfs_unregister(struct device *dev)
device_remove_file(dev, &dev_attr_glitch_filter);
device_remove_file(dev, &dev_attr_trickle_charger);
+ device_remove_bin_file(dev, &nvram_attr);
if (priv->irq <= 0)
return;
@@ -684,6 +754,7 @@ static struct spi_driver ds1343_driver = {
module_spi_driver(ds1343_driver);
MODULE_DESCRIPTION("DS1343 RTC SPI Driver");
-MODULE_AUTHOR("Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>");
+MODULE_AUTHOR("Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>,"
+ "Ankur Srivastava <sankurece@gmail.com>");
MODULE_LICENSE("GPL v2");
MODULE_VERSION(DS1343_DRV_VERSION);
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index c6b2191a4128..9822715db8ba 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -231,7 +231,7 @@ static struct platform_driver ds1742_rtc_driver = {
.driver = {
.name = "rtc-ds1742",
.owner = THIS_MODULE,
- .of_match_table = ds1742_rtc_of_match,
+ .of_match_table = of_match_ptr(ds1742_rtc_of_match),
},
};
diff --git a/drivers/rtc/rtc-efi-platform.c b/drivers/rtc/rtc-efi-platform.c
new file mode 100644
index 000000000000..b40fbe332af4
--- /dev/null
+++ b/drivers/rtc/rtc-efi-platform.c
@@ -0,0 +1,31 @@
+/*
+ * Moved from arch/ia64/kernel/time.c
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999-2000 VA Linux Systems
+ * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/platform_device.h>
+
+static struct platform_device rtc_efi_dev = {
+ .name = "rtc-efi",
+ .id = -1,
+};
+
+static int __init rtc_init(void)
+{
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
+ if (platform_device_register(&rtc_efi_dev) < 0)
+ pr_err("unable to register rtc device...\n");
+
+ /* not necessarily an error */
+ return 0;
+}
+module_init(rtc_init);
diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index c4c38431012e..8225b89de810 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/stringify.h>
#include <linux/time.h>
#include <linux/platform_device.h>
#include <linux/rtc.h>
@@ -48,8 +49,8 @@ compute_wday(efi_time_t *eft)
int y;
int ndays = 0;
- if (eft->year < 1998) {
- pr_err("EFI year < 1998, invalid date\n");
+ if (eft->year < EFI_RTC_EPOCH) {
+ pr_err("EFI year < " __stringify(EFI_RTC_EPOCH) ", invalid date\n");
return -1;
}
@@ -78,19 +79,36 @@ convert_to_efi_time(struct rtc_time *wtime, efi_time_t *eft)
eft->timezone = EFI_UNSPECIFIED_TIMEZONE;
}
-static void
+static bool
convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
{
memset(wtime, 0, sizeof(*wtime));
+
+ if (eft->second >= 60)
+ return false;
wtime->tm_sec = eft->second;
+
+ if (eft->minute >= 60)
+ return false;
wtime->tm_min = eft->minute;
+
+ if (eft->hour >= 24)
+ return false;
wtime->tm_hour = eft->hour;
+
+ if (!eft->day || eft->day > 31)
+ return false;
wtime->tm_mday = eft->day;
+
+ if (!eft->month || eft->month > 12)
+ return false;
wtime->tm_mon = eft->month - 1;
wtime->tm_year = eft->year - 1900;
/* day of the week [0-6], Sunday=0 */
wtime->tm_wday = compute_wday(eft);
+ if (wtime->tm_wday < 0)
+ return false;
/* day in the year [1-365]*/
wtime->tm_yday = compute_yday(eft);
@@ -106,6 +124,8 @@ convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
default:
wtime->tm_isdst = -1;
}
+
+ return true;
}
static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
@@ -122,7 +142,8 @@ static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
if (status != EFI_SUCCESS)
return -EINVAL;
- convert_from_efi_time(&eft, &wkalrm->time);
+ if (!convert_from_efi_time(&eft, &wkalrm->time))
+ return -EIO;
return rtc_valid_tm(&wkalrm->time);
}
@@ -163,7 +184,8 @@ static int efi_read_time(struct device *dev, struct rtc_time *tm)
return -EINVAL;
}
- convert_from_efi_time(&eft, tm);
+ if (!convert_from_efi_time(&eft, tm))
+ return -EIO;
return rtc_valid_tm(tm);
}
diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c
index 03b891129428..aa55f081c505 100644
--- a/drivers/rtc/rtc-isl12022.c
+++ b/drivers/rtc/rtc-isl12022.c
@@ -17,6 +17,8 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
#define DRV_VERSION "0.1"
@@ -271,6 +273,13 @@ static int isl12022_probe(struct i2c_client *client,
return PTR_ERR_OR_ZERO(isl12022->rtc);
}
+#ifdef CONFIG_OF
+static struct of_device_id isl12022_dt_match[] = {
+ { .compatible = "isl,isl12022" },
+ { },
+};
+#endif
+
static const struct i2c_device_id isl12022_id[] = {
{ "isl12022", 0 },
{ }
@@ -280,6 +289,9 @@ MODULE_DEVICE_TABLE(i2c, isl12022_id);
static struct i2c_driver isl12022_driver = {
.driver = {
.name = "rtc-isl12022",
+#ifdef CONFIG_OF
+ .of_match_table = of_match_ptr(isl12022_dt_match),
+#endif
},
.probe = isl12022_probe,
.id_table = isl12022_id,
diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
new file mode 100644
index 000000000000..6a12bf62c504
--- /dev/null
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -0,0 +1,204 @@
+/*
+ * An I2C driver for the PCF85063 RTC
+ * Copyright 2014 Rose Technology
+ *
+ * Author: Søren Andersen <san@rosetechnology.dk>
+ * Maintainers: http://www.nslu2-linux.org/
+ *
+ * based on the other drivers in this same directory.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/i2c.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/module.h>
+
+#define DRV_VERSION "0.0.1"
+
+#define PCF85063_REG_CTRL1 0x00 /* status */
+#define PCF85063_REG_CTRL2 0x01
+
+#define PCF85063_REG_SC 0x04 /* datetime */
+#define PCF85063_REG_MN 0x05
+#define PCF85063_REG_HR 0x06
+#define PCF85063_REG_DM 0x07
+#define PCF85063_REG_DW 0x08
+#define PCF85063_REG_MO 0x09
+#define PCF85063_REG_YR 0x0A
+
+#define PCF85063_MO_C 0x80 /* century */
+
+static struct i2c_driver pcf85063_driver;
+
+struct pcf85063 {
+ struct rtc_device *rtc;
+ int c_polarity; /* 0: MO_C=1 means 19xx, otherwise MO_C=1 means 20xx */
+ int voltage_low; /* indicates if a low_voltage was detected */
+};
+
+/*
+ * In the routines that deal directly with the pcf85063 hardware, we use
+ * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
+ */
+static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+ struct pcf85063 *pcf85063 = i2c_get_clientdata(client);
+ unsigned char buf[13] = { PCF85063_REG_CTRL1 };
+ struct i2c_msg msgs[] = {
+ {/* setup read ptr */
+ .addr = client->addr,
+ .len = 1,
+ .buf = buf
+ },
+ {/* read status + date */
+ .addr = client->addr,
+ .flags = I2C_M_RD,
+ .len = 13,
+ .buf = buf
+ },
+ };
+
+ /* read registers */
+ if ((i2c_transfer(client->adapter, msgs, 2)) != 2) {
+ dev_err(&client->dev, "%s: read error\n", __func__);
+ return -EIO;
+ }
+
+ tm->tm_sec = bcd2bin(buf[PCF85063_REG_SC] & 0x7F);
+ tm->tm_min = bcd2bin(buf[PCF85063_REG_MN] & 0x7F);
+ tm->tm_hour = bcd2bin(buf[PCF85063_REG_HR] & 0x3F); /* rtc hr 0-23 */
+ tm->tm_mday = bcd2bin(buf[PCF85063_REG_DM] & 0x3F);
+ tm->tm_wday = buf[PCF85063_REG_DW] & 0x07;
+ tm->tm_mon = bcd2bin(buf[PCF85063_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
+ tm->tm_year = bcd2bin(buf[PCF85063_REG_YR]);
+ if (tm->tm_year < 70)
+ tm->tm_year += 100; /* assume we are in 1970...2069 */
+ /* detect the polarity heuristically. see note above. */
+ pcf85063->c_polarity = (buf[PCF85063_REG_MO] & PCF85063_MO_C) ?
+ (tm->tm_year >= 100) : (tm->tm_year < 100);
+
+ /* the clock can give out invalid datetime, but we cannot return
+ * -EINVAL otherwise hwclock will refuse to set the time on bootup.
+ */
+ if (rtc_valid_tm(tm) < 0)
+ dev_err(&client->dev, "retrieved date/time is not valid.\n");
+
+ return 0;
+}
+
+static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+ int i = 0, err = 0;
+ unsigned char buf[11];
+
+ /* Control & status */
+ buf[PCF85063_REG_CTRL1] = 0;
+ buf[PCF85063_REG_CTRL2] = 5;
+
+ /* hours, minutes and seconds */
+ buf[PCF85063_REG_SC] = bin2bcd(tm->tm_sec) & 0x7F;
+
+ buf[PCF85063_REG_MN] = bin2bcd(tm->tm_min);
+ buf[PCF85063_REG_HR] = bin2bcd(tm->tm_hour);
+
+ /* Day of month, 1 - 31 */
+ buf[PCF85063_REG_DM] = bin2bcd(tm->tm_mday);
+
+ /* Day, 0 - 6 */
+ buf[PCF85063_REG_DW] = tm->tm_wday & 0x07;
+
+ /* month, 1 - 12 */
+ buf[PCF85063_REG_MO] = bin2bcd(tm->tm_mon + 1);
+
+ /* year and century */
+ buf[PCF85063_REG_YR] = bin2bcd(tm->tm_year % 100);
+
+ /* write register's data */
+ for (i = 0; i < sizeof(buf); i++) {
+ unsigned char data[2] = { i, buf[i] };
+
+ err = i2c_master_send(client, data, sizeof(data));
+ if (err != sizeof(data)) {
+ dev_err(&client->dev, "%s: err=%d addr=%02x, data=%02x\n",
+ __func__, err, data[0], data[1]);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static int pcf85063_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+ return pcf85063_get_datetime(to_i2c_client(dev), tm);
+}
+
+static int pcf85063_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+ return pcf85063_set_datetime(to_i2c_client(dev), tm);
+}
+
+static const struct rtc_class_ops pcf85063_rtc_ops = {
+ .read_time = pcf85063_rtc_read_time,
+ .set_time = pcf85063_rtc_set_time
+};
+
+static int pcf85063_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct pcf85063 *pcf85063;
+
+ dev_dbg(&client->dev, "%s\n", __func__);
+
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+ return -ENODEV;
+
+ pcf85063 = devm_kzalloc(&client->dev, sizeof(struct pcf85063),
+ GFP_KERNEL);
+ if (!pcf85063)
+ return -ENOMEM;
+
+ dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
+
+ i2c_set_clientdata(client, pcf85063);
+
+ pcf85063->rtc = devm_rtc_device_register(&client->dev,
+ pcf85063_driver.driver.name,
+ &pcf85063_rtc_ops, THIS_MODULE);
+
+ return PTR_ERR_OR_ZERO(pcf85063->rtc);
+}
+
+static const struct i2c_device_id pcf85063_id[] = {
+ { "pcf85063", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, pcf85063_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id pcf85063_of_match[] = {
+ { .compatible = "nxp,pcf85063" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, pcf85063_of_match);
+#endif
+
+static struct i2c_driver pcf85063_driver = {
+ .driver = {
+ .name = "rtc-pcf85063",
+ .owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(pcf85063_of_match),
+ },
+ .probe = pcf85063_probe,
+ .id_table = pcf85063_id,
+};
+
+module_i2c_driver(pcf85063_driver);
+
+MODULE_AUTHOR("Søren Andersen <san@rosetechnology.dk>");
+MODULE_DESCRIPTION("PCF85063 RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index 63b558c48196..5a197d9dc7e7 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -26,6 +26,8 @@
#define PCF8563_REG_ST1 0x00 /* status */
#define PCF8563_REG_ST2 0x01
+#define PCF8563_BIT_AIE (1 << 1)
+#define PCF8563_BIT_AF (1 << 3)
#define PCF8563_REG_SC 0x02 /* datetime */
#define PCF8563_REG_MN 0x03
@@ -36,9 +38,6 @@
#define PCF8563_REG_YR 0x08
#define PCF8563_REG_AMN 0x09 /* alarm */
-#define PCF8563_REG_AHR 0x0A
-#define PCF8563_REG_ADM 0x0B
-#define PCF8563_REG_ADW 0x0C
#define PCF8563_REG_CLKO 0x0D /* clock out */
#define PCF8563_REG_TMRC 0x0E /* timer control */
@@ -67,37 +66,133 @@ struct pcf8563 {
*/
int c_polarity; /* 0: MO_C=1 means 19xx, otherwise MO_C=1 means 20xx */
int voltage_low; /* incicates if a low_voltage was detected */
+
+ struct i2c_client *client;
};
-/*
- * In the routines that deal directly with the pcf8563 hardware, we use
- * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
- */
-static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int pcf8563_read_block_data(struct i2c_client *client, unsigned char reg,
+ unsigned char length, unsigned char *buf)
{
- struct pcf8563 *pcf8563 = i2c_get_clientdata(client);
- unsigned char buf[13] = { PCF8563_REG_ST1 };
-
struct i2c_msg msgs[] = {
{/* setup read ptr */
.addr = client->addr,
.len = 1,
- .buf = buf
+ .buf = &reg,
},
- {/* read status + date */
+ {
.addr = client->addr,
.flags = I2C_M_RD,
- .len = 13,
+ .len = length,
.buf = buf
},
};
- /* read registers */
if ((i2c_transfer(client->adapter, msgs, 2)) != 2) {
dev_err(&client->dev, "%s: read error\n", __func__);
return -EIO;
}
+ return 0;
+}
+
+static int pcf8563_write_block_data(struct i2c_client *client,
+ unsigned char reg, unsigned char length,
+ unsigned char *buf)
+{
+ int i, err;
+
+ for (i = 0; i < length; i++) {
+ unsigned char data[2] = { reg + i, buf[i] };
+
+ err = i2c_master_send(client, data, sizeof(data));
+ if (err != sizeof(data)) {
+ dev_err(&client->dev,
+ "%s: err=%d addr=%02x, data=%02x\n",
+ __func__, err, data[0], data[1]);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static int pcf8563_set_alarm_mode(struct i2c_client *client, bool on)
+{
+ unsigned char buf[2];
+ int err;
+
+ err = pcf8563_read_block_data(client, PCF8563_REG_ST2, 1, buf + 1);
+ if (err < 0)
+ return err;
+
+ if (on)
+ buf[1] |= PCF8563_BIT_AIE;
+ else
+ buf[1] &= ~PCF8563_BIT_AIE;
+
+ buf[1] &= ~PCF8563_BIT_AF;
+ buf[0] = PCF8563_REG_ST2;
+
+ err = pcf8563_write_block_data(client, PCF8563_REG_ST2, 1, buf + 1);
+ if (err < 0) {
+ dev_err(&client->dev, "%s: write error\n", __func__);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int pcf8563_get_alarm_mode(struct i2c_client *client, unsigned char *en,
+ unsigned char *pen)
+{
+ unsigned char buf;
+ int err;
+
+ err = pcf8563_read_block_data(client, PCF8563_REG_ST2, 1, &buf);
+ if (err)
+ return err;
+
+ if (en)
+ *en = !!(buf & PCF8563_BIT_AIE);
+ if (pen)
+ *pen = !!(buf & PCF8563_BIT_AF);
+
+ return 0;
+}
+
+static irqreturn_t pcf8563_irq(int irq, void *dev_id)
+{
+ struct pcf8563 *pcf8563 = i2c_get_clientdata(dev_id);
+ int err;
+ char pending;
+
+ err = pcf8563_get_alarm_mode(pcf8563->client, NULL, &pending);
+ if (err < 0)
+ return err;
+
+ if (pending) {
+ rtc_update_irq(pcf8563->rtc, 1, RTC_IRQF | RTC_AF);
+ pcf8563_set_alarm_mode(pcf8563->client, 1);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+/*
+ * In the routines that deal directly with the pcf8563 hardware, we use
+ * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
+ */
+static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+ struct pcf8563 *pcf8563 = i2c_get_clientdata(client);
+ unsigned char buf[9];
+ int err;
+
+ err = pcf8563_read_block_data(client, PCF8563_REG_ST1, 9, buf);
+ if (err)
+ return err;
+
if (buf[PCF8563_REG_SC] & PCF8563_SC_LV) {
pcf8563->voltage_low = 1;
dev_info(&client->dev,
@@ -144,7 +239,7 @@ static int pcf8563_get_datetime(struct i2c_client *client, struct rtc_time *tm)
static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
{
struct pcf8563 *pcf8563 = i2c_get_clientdata(client);
- int i, err;
+ int err;
unsigned char buf[9];
dev_dbg(&client->dev, "%s: secs=%d, mins=%d, hours=%d, "
@@ -170,19 +265,10 @@ static int pcf8563_set_datetime(struct i2c_client *client, struct rtc_time *tm)
buf[PCF8563_REG_DW] = tm->tm_wday & 0x07;
- /* write register's data */
- for (i = 0; i < 7; i++) {
- unsigned char data[2] = { PCF8563_REG_SC + i,
- buf[PCF8563_REG_SC + i] };
-
- err = i2c_master_send(client, data, sizeof(data));
- if (err != sizeof(data)) {
- dev_err(&client->dev,
- "%s: err=%d addr=%02x, data=%02x\n",
- __func__, err, data[0], data[1]);
- return -EIO;
- }
- }
+ err = pcf8563_write_block_data(client, PCF8563_REG_SC,
+ 9 - PCF8563_REG_SC, buf + PCF8563_REG_SC);
+ if (err)
+ return err;
return 0;
}
@@ -235,16 +321,83 @@ static int pcf8563_rtc_set_time(struct device *dev, struct rtc_time *tm)
return pcf8563_set_datetime(to_i2c_client(dev), tm);
}
+static int pcf8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *tm)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ unsigned char buf[4];
+ int err;
+
+ err = pcf8563_read_block_data(client, PCF8563_REG_AMN, 4, buf);
+ if (err)
+ return err;
+
+ dev_dbg(&client->dev,
+ "%s: raw data is min=%02x, hr=%02x, mday=%02x, wday=%02x\n",
+ __func__, buf[0], buf[1], buf[2], buf[3]);
+
+ tm->time.tm_min = bcd2bin(buf[0] & 0x7F);
+ tm->time.tm_hour = bcd2bin(buf[1] & 0x7F);
+ tm->time.tm_mday = bcd2bin(buf[2] & 0x1F);
+ tm->time.tm_wday = bcd2bin(buf[3] & 0x7);
+ tm->time.tm_mon = -1;
+ tm->time.tm_year = -1;
+ tm->time.tm_yday = -1;
+ tm->time.tm_isdst = -1;
+
+ err = pcf8563_get_alarm_mode(client, &tm->enabled, &tm->pending);
+ if (err < 0)
+ return err;
+
+ dev_dbg(&client->dev, "%s: tm is mins=%d, hours=%d, mday=%d, wday=%d,"
+ " enabled=%d, pending=%d\n", __func__, tm->time.tm_min,
+ tm->time.tm_hour, tm->time.tm_mday, tm->time.tm_wday,
+ tm->enabled, tm->pending);
+
+ return 0;
+}
+
+static int pcf8563_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ unsigned char buf[4];
+ int err;
+
+ dev_dbg(dev, "%s, min=%d hour=%d wday=%d mday=%d "
+ "enabled=%d pending=%d\n", __func__,
+ tm->time.tm_min, tm->time.tm_hour, tm->time.tm_wday,
+ tm->time.tm_mday, tm->enabled, tm->pending);
+
+ buf[0] = bin2bcd(tm->time.tm_min);
+ buf[1] = bin2bcd(tm->time.tm_hour);
+ buf[2] = bin2bcd(tm->time.tm_mday);
+ buf[3] = tm->time.tm_wday & 0x07;
+
+ err = pcf8563_write_block_data(client, PCF8563_REG_AMN, 4, buf);
+ if (err)
+ return err;
+
+ return pcf8563_set_alarm_mode(client, 1);
+}
+
+static int pcf8563_irq_enable(struct device *dev, unsigned int enabled)
+{
+ return pcf8563_set_alarm_mode(to_i2c_client(dev), !!enabled);
+}
+
static const struct rtc_class_ops pcf8563_rtc_ops = {
.ioctl = pcf8563_rtc_ioctl,
.read_time = pcf8563_rtc_read_time,
.set_time = pcf8563_rtc_set_time,
+ .read_alarm = pcf8563_rtc_read_alarm,
+ .set_alarm = pcf8563_rtc_set_alarm,
+ .alarm_irq_enable = pcf8563_irq_enable,
};
static int pcf8563_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
struct pcf8563 *pcf8563;
+ int err;
dev_dbg(&client->dev, "%s\n", __func__);
@@ -259,12 +412,30 @@ static int pcf8563_probe(struct i2c_client *client,
dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
i2c_set_clientdata(client, pcf8563);
+ pcf8563->client = client;
+ device_set_wakeup_capable(&client->dev, 1);
pcf8563->rtc = devm_rtc_device_register(&client->dev,
pcf8563_driver.driver.name,
&pcf8563_rtc_ops, THIS_MODULE);
- return PTR_ERR_OR_ZERO(pcf8563->rtc);
+ if (IS_ERR(pcf8563->rtc))
+ return PTR_ERR(pcf8563->rtc);
+
+ if (client->irq > 0) {
+ err = devm_request_threaded_irq(&client->dev, client->irq,
+ NULL, pcf8563_irq,
+ IRQF_SHARED|IRQF_ONESHOT|IRQF_TRIGGER_FALLING,
+ pcf8563->rtc->name, client);
+ if (err) {
+ dev_err(&client->dev, "unable to request IRQ %d\n",
+ client->irq);
+ return err;
+ }
+
+ }
+
+ return 0;
}
static const struct i2c_device_id pcf8563_id[] = {
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 02b0379ae550..4f34dc0095b5 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -585,7 +585,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
int ret;
- struct page **page_array_ptr;
page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
@@ -598,8 +597,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
}
tmp_area.addr = page_addr;
tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */;
- page_array_ptr = page;
- ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr);
+ ret = map_vm_area(&tmp_area, PAGE_KERNEL, page);
if (ret) {
pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n",
proc->pid, page_addr);
diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c
index 5dde79418297..8ef1deb59d4a 100644
--- a/drivers/staging/lustre/lustre/libcfs/hash.c
+++ b/drivers/staging/lustre/lustre/libcfs/hash.c
@@ -351,7 +351,7 @@ cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
cfs_hash_dhead_t, dh_head);
if (dh->dh_tail != NULL) /* not empty */
- hlist_add_after(dh->dh_tail, hnode);
+ hlist_add_behind(hnode, dh->dh_tail);
else /* empty list */
hlist_add_head(hnode, &dh->dh_head);
dh->dh_tail = hnode;
@@ -406,7 +406,7 @@ cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
cfs_hash_dhead_dep_t, dd_head);
if (dh->dd_tail != NULL) /* not empty */
- hlist_add_after(dh->dd_tail, hnode);
+ hlist_add_behind(hnode, dh->dd_tail);
else /* empty list */
hlist_add_head(hnode, &dh->dd_head);
dh->dd_tail = hnode;
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index b24aa010f68c..65cd80bf9aec 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -13,6 +13,10 @@ config VT
bool "Virtual terminal" if EXPERT
depends on !S390 && !UML
select INPUT
+ select NEW_LEDS
+ select LEDS_CLASS
+ select LEDS_TRIGGERS
+ select INPUT_LEDS
default y
---help---
If you say Y here, you will get support for terminal devices with
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 454b65898e2c..42bad18c66c9 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -355,7 +355,7 @@ static struct sysrq_key_op sysrq_term_op = {
static void moom_callback(struct work_struct *ignored)
{
- out_of_memory(node_zonelist(first_online_node, GFP_KERNEL), GFP_KERNEL,
+ out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL,
0, NULL, true);
}
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index d0e3a4497707..d6ecfc9e734f 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/leds.h>
#include <linux/kbd_kern.h>
#include <linux/kbd_diacr.h>
@@ -130,6 +131,7 @@ static char rep; /* flag telling character repeat */
static int shift_state = 0;
static unsigned char ledstate = 0xff; /* undefined */
+static unsigned char lockstate = 0xff; /* undefined */
static unsigned char ledioctl;
/*
@@ -961,6 +963,41 @@ static void k_brl(struct vc_data *vc, unsigned char value, char up_flag)
}
}
+/* We route VT keyboard "leds" through triggers */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_ledstate[] = {
+#define DEFINE_LEDSTATE_TRIGGER(kbd_led, nam) \
+ [kbd_led] = { \
+ .name = nam, \
+ .activate = kbd_ledstate_trigger_activate, \
+ }
+ DEFINE_LEDSTATE_TRIGGER(VC_SCROLLOCK, "kbd-scrollock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_NUMLOCK, "kbd-numlock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_CAPSLOCK, "kbd-capslock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_KANALOCK, "kbd-kanalock"),
+#undef DEFINE_LEDSTATE_TRIGGER
+};
+
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_lockstate[] = {
+#define DEFINE_LOCKSTATE_TRIGGER(kbd_led, nam) \
+ [kbd_led] = { \
+ .name = nam, \
+ .activate = kbd_lockstate_trigger_activate, \
+ }
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLOCK, "kbd-shiftlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_ALTGRLOCK, "kbd-altgrlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLOCK, "kbd-ctrllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_ALTLOCK, "kbd-altlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLLOCK, "kbd-shiftllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTRLOCK, "kbd-shiftrlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLLOCK, "kbd-ctrlllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLRLOCK, "kbd-ctrlrlock"),
+#undef DEFINE_LOCKSTATE_TRIGGER
+};
+
/*
* The leds display either (i) the status of NumLock, CapsLock, ScrollLock,
* or (ii) whatever pattern of lights people want to show using KDSETLED,
@@ -995,18 +1032,25 @@ static inline unsigned char getleds(void)
return kbd->ledflagstate;
}
-static int kbd_update_leds_helper(struct input_handle *handle, void *data)
+/* Called on trigger connection, to set initial state */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev)
{
- unsigned char leds = *(unsigned char *)data;
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - ledtrig_ledstate;
- if (test_bit(EV_LED, handle->dev->evbit)) {
- input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01));
- input_inject_event(handle, EV_LED, LED_NUML, !!(leds & 0x02));
- input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & 0x04));
- input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
- }
+ tasklet_disable(&keyboard_tasklet);
+ led_trigger_event(trigger, ledstate & (1 << led) ? LED_FULL : LED_OFF);
+ tasklet_enable(&keyboard_tasklet);
+}
- return 0;
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev)
+{
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - ledtrig_lockstate;
+
+ tasklet_disable(&keyboard_tasklet);
+ led_trigger_event(trigger, lockstate & (1 << led) ? LED_FULL : LED_OFF);
+ tasklet_enable(&keyboard_tasklet);
}
/**
@@ -1095,16 +1139,29 @@ static void kbd_bh(unsigned long dummy)
{
unsigned char leds;
unsigned long flags;
-
+ int i;
+
spin_lock_irqsave(&led_lock, flags);
leds = getleds();
spin_unlock_irqrestore(&led_lock, flags);
if (leds != ledstate) {
- input_handler_for_each_handle(&kbd_handler, &leds,
- kbd_update_leds_helper);
+ for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++)
+ if ((leds ^ ledstate) & (1 << i))
+ led_trigger_event(&ledtrig_ledstate[i],
+ leds & (1 << i)
+ ? LED_FULL : LED_OFF);
ledstate = leds;
}
+
+ if (kbd->lockstate != lockstate) {
+ for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++)
+ if ((kbd->lockstate ^ lockstate) & (1 << i))
+ led_trigger_event(&ledtrig_lockstate[i],
+ kbd->lockstate & (1 << i)
+ ? LED_FULL : LED_OFF);
+ lockstate = kbd->lockstate;
+ }
}
DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh, 0);
@@ -1442,20 +1499,6 @@ static void kbd_disconnect(struct input_handle *handle)
kfree(handle);
}
-/*
- * Start keyboard handler on the new keyboard by refreshing LED state to
- * match the rest of the system.
- */
-static void kbd_start(struct input_handle *handle)
-{
- tasklet_disable(&keyboard_tasklet);
-
- if (ledstate != 0xff)
- kbd_update_leds_helper(handle, &ledstate);
-
- tasklet_enable(&keyboard_tasklet);
-}
-
static const struct input_device_id kbd_ids[] = {
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
@@ -1477,7 +1520,6 @@ static struct input_handler kbd_handler = {
.match = kbd_match,
.connect = kbd_connect,
.disconnect = kbd_disconnect,
- .start = kbd_start,
.name = "kbd",
.id_table = kbd_ids,
};
@@ -1501,6 +1543,20 @@ int __init kbd_init(void)
if (error)
return error;
+ for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++) {
+ error = led_trigger_register(&ledtrig_ledstate[i]);
+ if (error)
+ pr_err("error %d while registering trigger %s\n",
+ error, ledtrig_ledstate[i].name);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++) {
+ error = led_trigger_register(&ledtrig_lockstate[i]);
+ if (error)
+ pr_err("error %d while registering trigger %s\n",
+ error, ledtrig_lockstate[i].name);
+ }
+
tasklet_enable(&keyboard_tasklet);
tasklet_schedule(&keyboard_tasklet);
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index bddc8b17a4d8..0ce8823952a8 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -190,8 +190,6 @@ static ssize_t brightness_store(struct device *dev,
}
mutex_unlock(&bd->ops_lock);
- backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);
-
return rc;
}
static DEVICE_ATTR_RW(brightness);
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index c770337c4b45..24575d9d882d 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -153,6 +153,7 @@ extern int adfs_map_lookup(struct super_block *sb, unsigned int frag_id, unsigne
extern unsigned int adfs_map_free(struct super_block *sb);
/* Misc */
+__printf(3, 4)
void __adfs_error(struct super_block *sb, const char *function,
const char *fmt, ...);
#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 0d138c0de293..51c279a29845 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -138,7 +138,7 @@ adfs_dir_lookup_byname(struct inode *inode, struct qstr *name, struct object_inf
goto out;
if (ADFS_I(inode)->parent_id != dir.parent_id) {
- adfs_error(sb, "parent directory changed under me! (%lx but got %lx)\n",
+ adfs_error(sb, "parent directory changed under me! (%lx but got %x)\n",
ADFS_I(inode)->parent_id, dir.parent_id);
ret = -EIO;
goto free_out;
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index d9e3bee4e653..f2ba88ab4aed 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -55,10 +55,10 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
}
size >>= sb->s_blocksize_bits;
- if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) {
+ if (size > ARRAY_SIZE(dir->bh)) {
/* this directory is too big for fixed bh set, must allocate */
struct buffer_head **bh_fplus =
- kzalloc(size * sizeof(struct buffer_head *),
+ kcalloc(size, sizeof(struct buffer_head *),
GFP_KERNEL);
if (!bh_fplus) {
adfs_error(sb, "not enough memory for"
@@ -79,9 +79,8 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
dir->bh_fplus[blk] = sb_bread(sb, block);
if (!dir->bh_fplus[blk]) {
- adfs_error(sb, "dir object %X failed read for"
- " offset %d, mapped block %X",
- id, blk, block);
+ adfs_error(sb, "dir object %x failed read for offset %d, mapped block %lX",
+ id, blk, block);
goto out;
}
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index acf32054edd8..9e359fb20c0a 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -143,20 +143,6 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
}
-/* Does a dentry have some pending activity? */
-static inline int autofs4_ispending(struct dentry *dentry)
-{
- struct autofs_info *inf = autofs4_dentry_ino(dentry);
-
- if (inf->flags & AUTOFS_INF_PENDING)
- return 1;
-
- if (inf->flags & AUTOFS_INF_EXPIRING)
- return 1;
-
- return 0;
-}
-
struct inode *autofs4_get_inode(struct super_block *, umode_t);
void autofs4_free_ino(struct autofs_info *);
@@ -191,55 +177,6 @@ extern const struct file_operations autofs4_root_operations;
extern const struct dentry_operations autofs4_dentry_operations;
/* VFS automount flags management functions */
-
-static inline void __managed_dentry_set_automount(struct dentry *dentry)
-{
- dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
-}
-
-static inline void managed_dentry_set_automount(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- __managed_dentry_set_automount(dentry);
- spin_unlock(&dentry->d_lock);
-}
-
-static inline void __managed_dentry_clear_automount(struct dentry *dentry)
-{
- dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
-}
-
-static inline void managed_dentry_clear_automount(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- __managed_dentry_clear_automount(dentry);
- spin_unlock(&dentry->d_lock);
-}
-
-static inline void __managed_dentry_set_transit(struct dentry *dentry)
-{
- dentry->d_flags |= DCACHE_MANAGE_TRANSIT;
-}
-
-static inline void managed_dentry_set_transit(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- __managed_dentry_set_transit(dentry);
- spin_unlock(&dentry->d_lock);
-}
-
-static inline void __managed_dentry_clear_transit(struct dentry *dentry)
-{
- dentry->d_flags &= ~DCACHE_MANAGE_TRANSIT;
-}
-
-static inline void managed_dentry_clear_transit(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- __managed_dentry_clear_transit(dentry);
- spin_unlock(&dentry->d_lock);
-}
-
static inline void __managed_dentry_set_managed(struct dentry *dentry)
{
dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 394e90b02c5e..a7be57e39be7 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -333,7 +333,6 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
if (ino->flags & AUTOFS_INF_PENDING)
goto out;
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
- struct autofs_info *ino = autofs4_dentry_ino(root);
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index cc87c1abac97..cdb25ebccc4c 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -166,8 +166,10 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
const unsigned char *str = name->name;
struct list_head *p, *head;
- spin_lock(&sbi->lookup_lock);
head = &sbi->active_list;
+ if (list_empty(head))
+ return NULL;
+ spin_lock(&sbi->lookup_lock);
list_for_each(p, head) {
struct autofs_info *ino;
struct dentry *active;
@@ -218,8 +220,10 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
const unsigned char *str = name->name;
struct list_head *p, *head;
- spin_lock(&sbi->lookup_lock);
head = &sbi->expiring_list;
+ if (list_empty(head))
+ return NULL;
+ spin_lock(&sbi->lookup_lock);
list_for_each(p, head) {
struct autofs_info *ino;
struct dentry *expiring;
@@ -373,7 +377,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
* this because the leaves of the directory tree under the
* mount never trigger mounts themselves (they have an autofs
* trigger mount mounted on them). But v4 pseudo direct mounts
- * do need the leaves to to trigger mounts. In this case we
+ * do need the leaves to trigger mounts. In this case we
* have no choice but to use the list_empty() check and
* require user space behave.
*/
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index f7f87e233dd9..f40006db36df 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -46,6 +46,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
/* inode.c */
extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
+extern void bfs_dump_imap(const char *, struct super_block *);
/* file.c */
extern const struct inode_operations bfs_file_inops;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index a399e6d9dc74..08063ae0a17c 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -75,8 +75,6 @@ const struct file_operations bfs_dir_operations = {
.llseek = generic_file_llseek,
};
-extern void dump_imap(const char *, struct super_block *);
-
static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
@@ -110,7 +108,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
BFS_I(inode)->i_eblock = 0;
insert_inode_hash(inode);
mark_inode_dirty(inode);
- dump_imap("create", s);
+ bfs_dump_imap("create", s);
err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len,
inode->i_ino);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 7041ac35ace8..90bc079d9982 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -30,8 +30,6 @@ MODULE_LICENSE("GPL");
#define dprintf(x...)
#endif
-void dump_imap(const char *prefix, struct super_block *s);
-
struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
{
struct bfs_inode *di;
@@ -194,7 +192,7 @@ static void bfs_evict_inode(struct inode *inode)
info->si_freeb += bi->i_eblock + 1 - bi->i_sblock;
info->si_freei++;
clear_bit(ino, info->si_imap);
- dump_imap("delete_inode", s);
+ bfs_dump_imap("delete_inode", s);
}
/*
@@ -297,7 +295,7 @@ static const struct super_operations bfs_sops = {
.statfs = bfs_statfs,
};
-void dump_imap(const char *prefix, struct super_block *s)
+void bfs_dump_imap(const char *prefix, struct super_block *s)
{
#ifdef DEBUG
int i;
@@ -443,7 +441,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
}
brelse(bh);
brelse(sbh);
- dump_imap("read_super", s);
+ bfs_dump_imap("read_super", s);
return 0;
out3:
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3892c1a23241..d94672a1f272 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -145,6 +145,25 @@ static int padzero(unsigned long elf_bss)
#define ELF_BASE_PLATFORM NULL
#endif
+/*
+ * Use get_random_int() to implement AT_RANDOM while avoiding depletion
+ * of the entropy pool.
+ */
+static void get_atrandom_bytes(unsigned char *buf, size_t nbytes)
+{
+ unsigned char *p = buf;
+
+ while (nbytes) {
+ unsigned int random_variable;
+ size_t chunk = min_t(size_t, nbytes, sizeof(random_variable));
+
+ random_variable = get_random_int();
+ memcpy(p, &random_variable, chunk);
+ p += chunk;
+ nbytes -= chunk;
+ }
+}
+
static int
create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
unsigned long load_addr, unsigned long interp_load_addr)
@@ -206,7 +225,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
/*
* Generate 16 random bytes for userspace PRNG seeding.
*/
- get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
+ get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes));
u_rand_bytes = (elf_addr_t __user *)
STACK_ALLOC(p, sizeof(k_rand_bytes));
if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index d749731dc0ee..dd333a67ebc2 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -39,13 +39,11 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
args);
/* start by checking things over */
- ASSERT(cache->fstop_percent >= 0 &&
- cache->fstop_percent < cache->fcull_percent &&
+ ASSERT(cache->fstop_percent < cache->fcull_percent &&
cache->fcull_percent < cache->frun_percent &&
cache->frun_percent < 100);
- ASSERT(cache->bstop_percent >= 0 &&
- cache->bstop_percent < cache->bcull_percent &&
+ ASSERT(cache->bstop_percent < cache->bcull_percent &&
cache->bcull_percent < cache->brun_percent &&
cache->brun_percent < 100);
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index b078d3081d6c..ff43e5a8711f 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -222,7 +222,7 @@ static ssize_t cachefiles_daemon_write(struct file *file,
if (test_bit(CACHEFILES_DEAD, &cache->flags))
return -EIO;
- if (datalen < 0 || datalen > PAGE_SIZE - 1)
+ if (datalen > PAGE_SIZE - 1)
return -EOPNOTSUPP;
/* drag the command string into the kernel so we can parse it */
@@ -385,7 +385,7 @@ static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args)
if (args[0] != '%' || args[1] != '\0')
return -EINVAL;
- if (fstop < 0 || fstop >= cache->fcull_percent)
+ if (fstop >= cache->fcull_percent)
return cachefiles_daemon_range_error(cache, args);
cache->fstop_percent = fstop;
@@ -457,7 +457,7 @@ static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args)
if (args[0] != '%' || args[1] != '\0')
return -EINVAL;
- if (bstop < 0 || bstop >= cache->bcull_percent)
+ if (bstop >= cache->bcull_percent)
return cachefiles_daemon_range_error(cache, args);
cache->bstop_percent = bstop;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 7ff866dbb89e..54ac0e8ad96c 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -38,7 +38,7 @@ static const struct cifs_sid sid_everyone = {
1, 1, {0, 0, 0, 0, 0, 1}, {0} };
/* security id for Authenticated Users system group */
static const struct cifs_sid sid_authusers = {
- 1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11)} };
+ 1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} };
/* group users */
static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 66f65001a6d8..86a2aa57560c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2477,14 +2477,14 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
}
parm_data = (struct cifs_posix_lock *)
((char *)&pSMBr->hdr.Protocol + data_offset);
- if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK))
+ if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
pLockData->fl_type = F_UNLCK;
else {
if (parm_data->lock_type ==
- __constant_cpu_to_le16(CIFS_RDLCK))
+ cpu_to_le16(CIFS_RDLCK))
pLockData->fl_type = F_RDLCK;
else if (parm_data->lock_type ==
- __constant_cpu_to_le16(CIFS_WRLCK))
+ cpu_to_le16(CIFS_WRLCK))
pLockData->fl_type = F_WRLCK;
pLockData->fl_start = le64_to_cpu(parm_data->start);
@@ -3276,25 +3276,25 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
pSMB->TotalParameterCount = 0;
- pSMB->TotalDataCount = __constant_cpu_to_le32(2);
+ pSMB->TotalDataCount = cpu_to_le32(2);
pSMB->MaxParameterCount = 0;
pSMB->MaxDataCount = 0;
pSMB->MaxSetupCount = 4;
pSMB->Reserved = 0;
pSMB->ParameterOffset = 0;
- pSMB->DataCount = __constant_cpu_to_le32(2);
+ pSMB->DataCount = cpu_to_le32(2);
pSMB->DataOffset =
cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req,
compression_state) - 4); /* 84 */
pSMB->SetupCount = 4;
- pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL);
+ pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
pSMB->ParameterCount = 0;
- pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION);
+ pSMB->FunctionCode = cpu_to_le32(FSCTL_SET_COMPRESSION);
pSMB->IsFsctl = 1; /* FSCTL */
pSMB->IsRootFlag = 0;
pSMB->Fid = fid; /* file handle always le */
/* 3 byte pad, followed by 2 byte compress state */
- pSMB->ByteCount = __constant_cpu_to_le16(5);
+ pSMB->ByteCount = cpu_to_le16(5);
inc_rfc1001_len(pSMB, 5);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3430,10 +3430,10 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
cifs_acl->version = cpu_to_le16(1);
if (acl_type == ACL_TYPE_ACCESS) {
cifs_acl->access_entry_count = cpu_to_le16(count);
- cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+ cifs_acl->default_entry_count = cpu_to_le16(0xFFFF);
} else if (acl_type == ACL_TYPE_DEFAULT) {
cifs_acl->default_entry_count = cpu_to_le16(count);
- cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+ cifs_acl->access_entry_count = cpu_to_le16(0xFFFF);
} else {
cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
return 0;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4ab2f79ffa7a..553747f8127e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1058,7 +1058,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
- buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
if (!buf) {
free_xid(xid);
return -ENOMEM;
@@ -1393,7 +1393,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
- buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
if (!buf)
return -ENOMEM;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 39ee32688eac..39b850786e12 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -46,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
USHRT_MAX));
pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
- pSMB->req.VcNumber = __constant_cpu_to_le16(1);
+ pSMB->req.VcNumber = cpu_to_le16(1);
/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 3f17b4550831..e5100b8fb3a0 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -111,7 +111,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
return -EINVAL;
max_num = max_buf / sizeof(struct smb2_lock_element);
- buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -247,7 +247,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
}
max_num = max_buf / sizeof(struct smb2_lock_element);
- buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf) {
free_xid(xid);
return -ENOMEM;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index f2e6ac29a8d6..da0faf6e13f2 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -67,27 +67,27 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
* indexed by command in host byte order
*/
static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
- /* SMB2_NEGOTIATE */ __constant_cpu_to_le16(65),
- /* SMB2_SESSION_SETUP */ __constant_cpu_to_le16(9),
- /* SMB2_LOGOFF */ __constant_cpu_to_le16(4),
- /* SMB2_TREE_CONNECT */ __constant_cpu_to_le16(16),
- /* SMB2_TREE_DISCONNECT */ __constant_cpu_to_le16(4),
- /* SMB2_CREATE */ __constant_cpu_to_le16(89),
- /* SMB2_CLOSE */ __constant_cpu_to_le16(60),
- /* SMB2_FLUSH */ __constant_cpu_to_le16(4),
- /* SMB2_READ */ __constant_cpu_to_le16(17),
- /* SMB2_WRITE */ __constant_cpu_to_le16(17),
- /* SMB2_LOCK */ __constant_cpu_to_le16(4),
- /* SMB2_IOCTL */ __constant_cpu_to_le16(49),
+ /* SMB2_NEGOTIATE */ cpu_to_le16(65),
+ /* SMB2_SESSION_SETUP */ cpu_to_le16(9),
+ /* SMB2_LOGOFF */ cpu_to_le16(4),
+ /* SMB2_TREE_CONNECT */ cpu_to_le16(16),
+ /* SMB2_TREE_DISCONNECT */ cpu_to_le16(4),
+ /* SMB2_CREATE */ cpu_to_le16(89),
+ /* SMB2_CLOSE */ cpu_to_le16(60),
+ /* SMB2_FLUSH */ cpu_to_le16(4),
+ /* SMB2_READ */ cpu_to_le16(17),
+ /* SMB2_WRITE */ cpu_to_le16(17),
+ /* SMB2_LOCK */ cpu_to_le16(4),
+ /* SMB2_IOCTL */ cpu_to_le16(49),
/* BB CHECK this ... not listed in documentation */
- /* SMB2_CANCEL */ __constant_cpu_to_le16(0),
- /* SMB2_ECHO */ __constant_cpu_to_le16(4),
- /* SMB2_QUERY_DIRECTORY */ __constant_cpu_to_le16(9),
- /* SMB2_CHANGE_NOTIFY */ __constant_cpu_to_le16(9),
- /* SMB2_QUERY_INFO */ __constant_cpu_to_le16(9),
- /* SMB2_SET_INFO */ __constant_cpu_to_le16(2),
+ /* SMB2_CANCEL */ cpu_to_le16(0),
+ /* SMB2_ECHO */ cpu_to_le16(4),
+ /* SMB2_QUERY_DIRECTORY */ cpu_to_le16(9),
+ /* SMB2_CHANGE_NOTIFY */ cpu_to_le16(9),
+ /* SMB2_QUERY_INFO */ cpu_to_le16(9),
+ /* SMB2_SET_INFO */ cpu_to_le16(2),
/* BB FIXME can also be 44 for lease break */
- /* SMB2_OPLOCK_BREAK */ __constant_cpu_to_le16(24)
+ /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
};
int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 77f8aeb9c2fc..527833197fb3 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -597,7 +597,7 @@ smb2_clone_range(const unsigned int xid,
goto cchunk_out;
/* For now array only one chunk long, will make more flexible later */
- pcchunk->ChunkCount = __constant_cpu_to_le32(1);
+ pcchunk->ChunkCount = cpu_to_le32(1);
pcchunk->Reserved = 0;
pcchunk->Reserved2 = 0;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 42ebc1a8be6c..a5f2a5ce4cca 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1354,7 +1354,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
char *ret_data = NULL;
fsctl_input.CompressionState =
- __constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+ cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SET_COMPRESSION, true /* is_fsctl */,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 69f3595d3952..d03adad014db 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -85,7 +85,7 @@
/* BB FIXME - analyze following length BB */
#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
-#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
+#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
/*
* SMB2 Header Definition
@@ -96,7 +96,7 @@
*
*/
-#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
+#define SMB2_HEADER_STRUCTURE_SIZE cpu_to_le16(64)
struct smb2_hdr {
__be32 smb2_buf_length; /* big endian on wire */
@@ -137,16 +137,16 @@ struct smb2_transform_hdr {
} __packed;
/* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM __constant_cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
/*
* SMB2 flag definitions
*/
-#define SMB2_FLAGS_SERVER_TO_REDIR __constant_cpu_to_le32(0x00000001)
-#define SMB2_FLAGS_ASYNC_COMMAND __constant_cpu_to_le32(0x00000002)
-#define SMB2_FLAGS_RELATED_OPERATIONS __constant_cpu_to_le32(0x00000004)
-#define SMB2_FLAGS_SIGNED __constant_cpu_to_le32(0x00000008)
-#define SMB2_FLAGS_DFS_OPERATIONS __constant_cpu_to_le32(0x10000000)
+#define SMB2_FLAGS_SERVER_TO_REDIR cpu_to_le32(0x00000001)
+#define SMB2_FLAGS_ASYNC_COMMAND cpu_to_le32(0x00000002)
+#define SMB2_FLAGS_RELATED_OPERATIONS cpu_to_le32(0x00000004)
+#define SMB2_FLAGS_SIGNED cpu_to_le32(0x00000008)
+#define SMB2_FLAGS_DFS_OPERATIONS cpu_to_le32(0x10000000)
/*
* Definitions for SMB2 Protocol Data Units (network frames)
@@ -157,7 +157,7 @@ struct smb2_transform_hdr {
*
*/
-#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
+#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
struct smb2_err_rsp {
struct smb2_hdr hdr;
@@ -500,12 +500,12 @@ struct create_context {
#define SMB2_LEASE_HANDLE_CACHING_HE 0x02
#define SMB2_LEASE_WRITE_CACHING_HE 0x04
-#define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00)
-#define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01)
-#define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02)
-#define SMB2_LEASE_WRITE_CACHING __constant_cpu_to_le32(0x04)
+#define SMB2_LEASE_NONE cpu_to_le32(0x00)
+#define SMB2_LEASE_READ_CACHING cpu_to_le32(0x01)
+#define SMB2_LEASE_HANDLE_CACHING cpu_to_le32(0x02)
+#define SMB2_LEASE_WRITE_CACHING cpu_to_le32(0x04)
-#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02)
#define SMB2_LEASE_KEY_SIZE 16
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 1da168c61d35..278f8fdeb9ef 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -13,7 +13,7 @@
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/list.h>
#include <linux/sched.h>
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 2849f41e72a2..1326d38960db 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -13,7 +13,7 @@
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/coda.h>
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index cd8a63238b11..9c3dedc000d1 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -19,8 +19,7 @@
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/namei.h>
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 9e83b7790212..d244d743a232 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -18,7 +18,7 @@
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index fe3afb2de880..b945410bfcd5 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -21,9 +21,7 @@
#include <linux/vfs.h>
#include <linux/slab.h>
#include <linux/pid_namespace.h>
-
-#include <asm/uaccess.h>
-
+#include <linux/uaccess.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 3f5de96bbb58..4326d172fc27 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -16,7 +16,7 @@
#include <linux/string.h>
#include <linux/namei.h>
#include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 5c1e4242368b..822629126e89 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -40,7 +40,7 @@
#include <linux/pid_namespace.h>
#include <asm/io.h>
#include <asm/poll.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 21fcf8dcb9cd..5bb6e27298a4 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -27,7 +27,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <linux/vfs.h>
diff --git a/fs/compat.c b/fs/compat.c
index 66d3d3c6b4b2..6917fdba382c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -562,7 +562,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
goto out;
ret = -EINVAL;
- if (nr_segs > UIO_MAXIOV || nr_segs < 0)
+ if (nr_segs > UIO_MAXIOV)
goto out;
if (nr_segs > fast_segs) {
ret = -ENOMEM;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index ddcfe590b8a8..355c522f3585 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -11,6 +11,8 @@
* The actual compression is based on zlib, see the other files.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -21,7 +23,7 @@
#include <linux/vfs.h>
#include <linux/mutex.h>
#include <uapi/linux/cramfs_fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "internal.h"
@@ -153,7 +155,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE];
static unsigned buffer_blocknr[READ_BUFFERS];
-static struct super_block * buffer_dev[READ_BUFFERS];
+static struct super_block *buffer_dev[READ_BUFFERS];
static int next_buffer;
/*
@@ -205,6 +207,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
for (i = 0; i < BLKS_PER_BUF; i++) {
struct page *page = pages[i];
+
if (page) {
wait_on_page_locked(page);
if (!PageUptodate(page)) {
@@ -223,6 +226,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
data = read_buffers[buffer];
for (i = 0; i < BLKS_PER_BUF; i++) {
struct page *page = pages[i];
+
if (page) {
memcpy(data, kmap(page), PAGE_CACHE_SIZE);
kunmap(page);
@@ -237,6 +241,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
static void cramfs_kill_sb(struct super_block *sb)
{
struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
+
kill_block_super(sb);
kfree(sbi);
}
@@ -277,7 +282,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
/* check for wrong endianness */
if (super.magic == CRAMFS_MAGIC_WEND) {
if (!silent)
- printk(KERN_ERR "cramfs: wrong endianness\n");
+ pr_err("wrong endianness\n");
return -EINVAL;
}
@@ -287,22 +292,22 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
mutex_unlock(&read_mutex);
if (super.magic != CRAMFS_MAGIC) {
if (super.magic == CRAMFS_MAGIC_WEND && !silent)
- printk(KERN_ERR "cramfs: wrong endianness\n");
+ pr_err("wrong endianness\n");
else if (!silent)
- printk(KERN_ERR "cramfs: wrong magic\n");
+ pr_err("wrong magic\n");
return -EINVAL;
}
}
/* get feature flags first */
if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) {
- printk(KERN_ERR "cramfs: unsupported filesystem features\n");
+ pr_err("unsupported filesystem features\n");
return -EINVAL;
}
/* Check that the root inode is in a sane state */
if (!S_ISDIR(super.root.mode)) {
- printk(KERN_ERR "cramfs: root is not a directory\n");
+ pr_err("root is not a directory\n");
return -EINVAL;
}
/* correct strange, hard-coded permissions of mkcramfs */
@@ -310,23 +315,23 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
root_offset = super.root.offset << 2;
if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) {
- sbi->size=super.size;
- sbi->blocks=super.fsid.blocks;
- sbi->files=super.fsid.files;
+ sbi->size = super.size;
+ sbi->blocks = super.fsid.blocks;
+ sbi->files = super.fsid.files;
} else {
- sbi->size=1<<28;
- sbi->blocks=0;
- sbi->files=0;
+ sbi->size = 1<<28;
+ sbi->blocks = 0;
+ sbi->files = 0;
}
- sbi->magic=super.magic;
- sbi->flags=super.flags;
+ sbi->magic = super.magic;
+ sbi->flags = super.flags;
if (root_offset == 0)
- printk(KERN_INFO "cramfs: empty filesystem");
+ pr_info("empty filesystem");
else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
((root_offset != sizeof(struct cramfs_super)) &&
(root_offset != 512 + sizeof(struct cramfs_super))))
{
- printk(KERN_ERR "cramfs: bad root offset %lu\n", root_offset);
+ pr_err("bad root offset %lu\n", root_offset);
return -EINVAL;
}
@@ -425,7 +430,7 @@ static int cramfs_readdir(struct file *file, struct dir_context *ctx)
/*
* Lookup and fill in the inode data..
*/
-static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+static struct dentry *cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
unsigned int offset = 0;
struct inode *inode = NULL;
@@ -483,7 +488,7 @@ out:
return NULL;
}
-static int cramfs_readpage(struct file *file, struct page * page)
+static int cramfs_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
u32 maxblock;
@@ -511,7 +516,7 @@ static int cramfs_readpage(struct file *file, struct page * page)
if (compr_len == 0)
; /* hole */
else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) {
- pr_err("cramfs: bad compressed blocksize %u\n",
+ pr_err("bad compressed blocksize %u\n",
compr_len);
goto err;
} else {
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 1760c1b84d97..ec4f1d4fdad0 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -15,6 +15,8 @@
* then is used by multiple filesystems.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/vmalloc.h>
@@ -37,7 +39,7 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
err = zlib_inflateReset(&stream);
if (err != Z_OK) {
- printk("zlib_inflateReset error %d\n", err);
+ pr_err("zlib_inflateReset error %d\n", err);
zlib_inflateEnd(&stream);
zlib_inflateInit(&stream);
}
@@ -48,8 +50,8 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
return stream.total_out;
err:
- printk("Error %d while decompressing!\n", err);
- printk("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen);
+ pr_err("Error %d while decompressing!\n", err);
+ pr_err("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen);
return -EIO;
}
@@ -57,7 +59,7 @@ int cramfs_uncompress_init(void)
{
if (!initialized++) {
stream.workspace = vmalloc(zlib_inflate_workspacesize());
- if ( !stream.workspace ) {
+ if (!stream.workspace) {
initialized = 0;
return -ENOMEM;
}
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 8d77ba7b1756..1323c568e362 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -718,16 +718,11 @@ static const struct file_operations waiters_fops = {
void dlm_delete_debug_file(struct dlm_ls *ls)
{
- if (ls->ls_debug_rsb_dentry)
- debugfs_remove(ls->ls_debug_rsb_dentry);
- if (ls->ls_debug_waiters_dentry)
- debugfs_remove(ls->ls_debug_waiters_dentry);
- if (ls->ls_debug_locks_dentry)
- debugfs_remove(ls->ls_debug_locks_dentry);
- if (ls->ls_debug_all_dentry)
- debugfs_remove(ls->ls_debug_all_dentry);
- if (ls->ls_debug_toss_dentry)
- debugfs_remove(ls->ls_debug_toss_dentry);
+ debugfs_remove(ls->ls_debug_rsb_dentry);
+ debugfs_remove(ls->ls_debug_waiters_dentry);
+ debugfs_remove(ls->ls_debug_locks_dentry);
+ debugfs_remove(ls->ls_debug_all_dentry);
+ debugfs_remove(ls->ls_debug_toss_dentry);
}
int dlm_create_debug_file(struct dlm_ls *ls)
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 356c044e2cd3..bbee8f063dfa 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -12,7 +12,8 @@
#include "efs.h"
-static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) {
+static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
+{
struct buffer_head *bh;
int slot, namelen;
@@ -40,10 +41,10 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
if (be16_to_cpu(dirblock->magic) != EFS_DIRBLK_MAGIC) {
pr_err("%s(): invalid directory block\n", __func__);
brelse(bh);
- return(0);
+ return 0;
}
- for(slot = 0; slot < dirblock->slots; slot++) {
+ for (slot = 0; slot < dirblock->slots; slot++) {
dirslot = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot));
namelen = dirslot->namelen;
@@ -52,12 +53,12 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
if ((namelen == len) && (!memcmp(name, nameptr, len))) {
inodenum = be32_to_cpu(dirslot->inode);
brelse(bh);
- return(inodenum);
+ return inodenum;
}
}
brelse(bh);
}
- return(0);
+ return 0;
}
struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
diff --git a/fs/exec.c b/fs/exec.c
index ab1f1200ce5d..a2b42a98c743 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -368,10 +368,6 @@ static int bprm_mm_init(struct linux_binprm *bprm)
if (!mm)
goto err;
- err = init_new_context(current, mm);
- if (err)
- goto err;
-
err = __bprm_mm_init(bprm);
if (err)
goto err;
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 7f20f25c232c..84529b8a331b 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -116,7 +116,7 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width,
num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa,
pages_in_unit - i);
- __a1pa = kzalloc(num_a1pa * sizeof__a1pa, GFP_KERNEL);
+ __a1pa = kcalloc(num_a1pa, sizeof__a1pa, GFP_KERNEL);
if (unlikely(!__a1pa)) {
ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n",
num_a1pa);
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a8bc47f75fa0..5b6e9f246233 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -107,7 +107,10 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
if (!journal) {
- ret = generic_file_fsync(file, start, end, datasync);
+ if (test_opt(inode->i_sb, BARRIER))
+ ret = generic_file_fsync(file, start, end, datasync);
+ else
+ ret = __generic_file_fsync(file, start, end, datasync);
if (!ret && !hlist_empty(&inode->i_dentry))
ret = ext4_sync_parent(inode);
goto out;
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index a31b83c5cbd9..b39d487ccfb0 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -67,7 +67,7 @@ static int fscache_max_active_sysctl(struct ctl_table *table, int write,
return ret;
}
-struct ctl_table fscache_sysctls[] = {
+static struct ctl_table fscache_sysctls[] = {
{
.procname = "object_max_active",
.data = &fscache_object_max_active,
@@ -87,7 +87,7 @@ struct ctl_table fscache_sysctls[] = {
{}
};
-struct ctl_table fscache_sysctls_root[] = {
+static struct ctl_table fscache_sysctls_root[] = {
{
.procname = "fscache",
.mode = 0555,
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 32602c667b4a..7892e6fddb66 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
}
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
- u32 parent, struct qstr *str)
+/* Generates key for catalog file/folders record. */
+int hfsplus_cat_build_key(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent, struct qstr *str)
{
- int len;
+ int len, err;
key->cat.parent = cpu_to_be32(parent);
- if (str) {
- hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
- str->name, str->len);
- len = be16_to_cpu(key->cat.name.length);
- } else {
- key->cat.name.length = 0;
- len = 0;
- }
+ err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
+ str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
+ len = be16_to_cpu(key->cat.name.length);
key->key_len = cpu_to_be16(6 + 2 * len);
+ return 0;
+}
+
+/* Generates key for catalog thread record. */
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent)
+{
+ key->cat.parent = cpu_to_be32(parent);
+ key->cat.name.length = 0;
+ key->key_len = cpu_to_be16(6);
}
static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent,
@@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
hfsplus_cat_entry *entry, int type,
u32 parentid, struct qstr *str)
{
+ int err;
+
entry->type = cpu_to_be16(type);
entry->thread.reserved = 0;
entry->thread.parentID = cpu_to_be32(parentid);
- hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
+ err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
}
@@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
int err;
u16 type;
- hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid);
err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry));
if (err)
return err;
@@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
return err;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry,
S_ISDIR(inode->i_mode) ?
HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
dir->i_ino, str);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto err2;
+ }
+
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
goto err2;
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto err1;
+
entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
@@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
return 0;
err1:
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
hfs_brec_remove(&fd);
err2:
@@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (!str) {
int len;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
off + 2, len);
fd.search_key->key_len = cpu_to_be16(6 + len);
} else
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto out;
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
@@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (err)
goto out;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_fd = src_fd;
/* find the old dir entry and read the data */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid,
type = be16_to_cpu(entry.type);
/* create new dir entry with the data from the old entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
+ err = hfsplus_cat_build_key(sb, dst_fd.search_key,
+ dst_dir->i_ino, dst_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
/* finally remove the old entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid,
src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
/* remove old thread entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid,
goto out;
/* create new thread entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
dst_dir->i_ino, dst_name);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto out;
+ }
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 610a3260bef1..435bea231cc6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -44,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
if (err)
return ERR_PTR(err);
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino,
+ &dentry->d_name);
+ if (unlikely(err < 0))
+ goto fail;
again:
err = hfs_brec_read(&fd, &entry, sizeof(entry));
if (err) {
@@ -97,9 +100,11 @@ again:
be32_to_cpu(entry.file.permissions.dev);
str.len = sprintf(name, "iNode%d", linkid);
str.name = name;
- hfsplus_cat_build_key(sb, fd.search_key,
+ err = hfsplus_cat_build_key(sb, fd.search_key,
HFSPLUS_SB(sb)->hidden_dir->i_ino,
&str);
+ if (unlikely(err < 0))
+ goto fail;
goto again;
}
} else if (!dentry->d_fsdata)
@@ -145,7 +150,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
err = -ENOMEM;
goto out;
}
- hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index eb5e059f481a..b0441d65fa54 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -443,8 +443,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1,
const hfsplus_btree_key *k2);
int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
const hfsplus_btree_key *k2);
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
+int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
u32 parent, struct qstr *str);
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent);
void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms);
int hfsplus_find_cat(struct super_block *sb, u32 cnid,
struct hfs_find_data *fd);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4cf2024b87da..593af2fdcc2d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -515,7 +515,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
err = hfs_find_init(sbi->cat_tree, &fd);
if (err)
goto out_put_root;
- hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ if (unlikely(err < 0))
+ goto out_put_root;
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index f36fc010fccb..2923a7bd82ac 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -545,12 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
struct dnode *d1;
struct quad_buffer_head qbh1;
if (hpfs_sb(i->i_sb)->sb_chk)
- if (up != i->i_ino) {
- hpfs_error(i->i_sb,
- "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
- dno, up, (unsigned long)i->i_ino);
- return;
- }
+ if (up != i->i_ino) {
+ hpfs_error(i->i_sb,
+ "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
+ dno, up,
+ (unsigned long)i->i_ino);
+ return;
+ }
if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
d1->up = cpu_to_le32(up);
d1->root_dnode = 1;
@@ -1061,8 +1062,8 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
hpfs_brelse4(qbh);
if (hpfs_sb(s)->sb_chk)
if (hpfs_stop_cycles(s, dno, &c1, &c2, "map_fnode_dirent #1")) {
- kfree(name2);
- return NULL;
+ kfree(name2);
+ return NULL;
}
goto go_down;
}
diff --git a/fs/isofs/Makefile b/fs/isofs/Makefile
index bf162f0942d5..47a68e357512 100644
--- a/fs/isofs/Makefile
+++ b/fs/isofs/Makefile
@@ -8,3 +8,5 @@ isofs-objs-y := namei.o inode.o dir.o util.o rock.o export.o
isofs-objs-$(CONFIG_JOLIET) += joliet.o
isofs-objs-$(CONFIG_ZISOFS) += compress.o
isofs-objs := $(isofs-objs-y)
+
+# ccflags-y := -DDEBUG_FLAGS=1
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 592e5115a561..c9ba688dc407 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -15,6 +15,8 @@
*
* Transparent decompression of files on an iso9660 filesystem
*/
+#define DEBUG
+#define pr_fmt(fmt) "zisofs: " fmt
#include <linux/module.h>
#include <linux/init.h>
@@ -110,7 +112,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
*errp = -ENOMEM;
else
*errp = -EIO;
- printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
+ pr_debug("zisofs_inflateInit returned %d\n",
zerr);
goto z_eio;
}
@@ -154,12 +156,12 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
if (zerr == Z_MEM_ERROR)
*errp = -ENOMEM;
else {
- printk(KERN_DEBUG
+ pr_debug(
"zisofs: zisofs_inflate returned"
" %d, inode = %lu,"
" page idx = %d, bh idx = %d,"
- " avail_in = %d,"
- " avail_out = %d\n",
+ " avail_in = %ld,"
+ " avail_out = %ld\n",
zerr, inode->i_ino, curpage,
curbh, stream.avail_in,
stream.avail_out);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index 12088d8de3fa..44d1053dfad5 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -12,7 +12,7 @@
* Documentation/filesystems/nfs/Exporting
* fs/exportfs/expfs.c.
*/
-
+#define pr_fmt(fmt) "ISOFS: " fmt
#include "isofs.h"
static struct dentry *
@@ -52,8 +52,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
/* "child" must always be a directory. */
if (!S_ISDIR(child_inode->i_mode)) {
- printk(KERN_ERR "isofs: isofs_export_get_parent(): "
- "child is not a directory!\n");
+ pr_err("%s(): child is not a directory!\n", __func__);
rv = ERR_PTR(-EACCES);
goto out;
}
@@ -62,8 +61,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
* it is not zero, it means the directory failed to be
* normalized for some reason. */
if (e_child_inode->i_iget5_offset != 0) {
- printk(KERN_ERR "isofs: isofs_export_get_parent(): "
- "child directory not normalized!\n");
+ pr_err("isofs_export_get_parent(): child directory not normalized!\n");
rv = ERR_PTR(-EACCES);
goto out;
}
@@ -89,8 +87,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
/* Verify it is in fact the ".." entry. */
if ((isonum_711(de->name_len) != 1) || (de->name[0] != 1)) {
- printk(KERN_ERR "isofs: Unable to find the \"..\" "
- "directory for NFS.\n");
+ pr_err("Unable to find the \"..\" directory for NFS.\n");
rv = ERR_PTR(-EACCES);
goto out;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4556ce1af5b0..cc23d86e174a 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -10,6 +10,8 @@
* 2004 Paul Serice - Inode Support pushed out from 4GB to 128GB
* 2004 Paul Serice - NFS Export Operations
*/
+#define DEBUG
+#define pr_fmt(fmt) "ISOFS: " fmt
#include <linux/init.h>
#include <linux/module.h>
@@ -528,23 +530,25 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
Te.cdte_format=CDROM_LBA;
i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te);
if (!i) {
- printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n",
+ pr_debug("Session %d start %d type %d\n",
session, Te.cdte_addr.lba,
Te.cdte_ctrl&CDROM_DATA_TRACK);
if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4)
return Te.cdte_addr.lba;
}
- printk(KERN_ERR "ISOFS: Invalid session number or type of track\n");
+ pr_err("Invalid session number or type of track\n");
}
i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
if (session > 0)
- printk(KERN_ERR "ISOFS: Invalid session number\n");
+ pr_err("Invalid session number\n");
#if 0
- printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
+ pr_debug("isofs.inode: CDROMMULTISESSION: rc=%d\n", i);
if (i==0) {
- printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no");
- printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba);
+ pr_debug("isofs.inode: XA disk: %s\n",
+ ms_info.xa_flag?"yes":"no");
+ pr_debug("isofs.inode: vol_desc_start = %d\n",
+ ms_info.addr.lba);
}
#endif
if (i==0)
@@ -672,8 +676,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
else if (sec->escape[2] == 0x45)
joliet_level = 3;
- printk(KERN_DEBUG "ISO 9660 Extensions: "
- "Microsoft Joliet Level %d\n",
+ pr_debug("ISO 9660 Extensions: Microsoft Joliet Level %d\n",
joliet_level);
}
goto root_found;
@@ -771,11 +774,11 @@ root_found:
isonum_711(rootp->ext_attr_length);
sbi->s_firstdatazone = first_data_zone;
#ifndef BEQUIET
- printk(KERN_DEBUG "ISOFS: Max size:%ld Log zone size:%ld\n",
+ pr_debug("Max size:%ld Log zone size:%ld\n",
sbi->s_max_size, 1UL << sbi->s_log_zone_size);
- printk(KERN_DEBUG "ISOFS: First datazone:%ld\n", sbi->s_firstdatazone);
+ pr_debug("First datazone:%ld\n", sbi->s_firstdatazone);
if(sbi->s_high_sierra)
- printk(KERN_DEBUG "ISOFS: Disc in High Sierra format.\n");
+ pr_debug("Disc in High Sierra format.\n");
#endif
/*
@@ -878,9 +881,7 @@ root_found:
*/
if (sbi->s_rock == 1 && joliet_level &&
rootdir_empty(s, sbi->s_firstdatazone)) {
- printk(KERN_NOTICE
- "ISOFS: primary root directory is empty. "
- "Disabling Rock Ridge and switching to Joliet.");
+ pr_notice("primary root directory is empty. Disabling Rock Ridge and switching to Joliet.");
sbi->s_rock = 0;
}
@@ -898,8 +899,7 @@ root_found:
sbi->s_rock = 0;
if (sbi->s_firstdatazone != first_data_zone) {
sbi->s_firstdatazone = first_data_zone;
- printk(KERN_DEBUG
- "ISOFS: changing to secondary root\n");
+ pr_debug("changing to secondary root\n");
iput(inode);
inode = isofs_iget(s, sbi->s_firstdatazone, 0);
if (IS_ERR(inode))
@@ -918,9 +918,8 @@ root_found:
/* Make sure the root inode is a directory */
if (!S_ISDIR(inode->i_mode)) {
- printk(KERN_WARNING
- "isofs_fill_super: root inode is not a directory. "
- "Corrupted media?\n");
+ pr_warn("%s: root inode is not a directory. Corrupted media?\n",
+ __func__);
goto out_iput;
}
@@ -952,27 +951,26 @@ out_iput:
out_no_root:
error = PTR_ERR(inode);
if (error != -ENOMEM)
- printk(KERN_WARNING "%s: get root inode failed\n", __func__);
+ pr_warn("%s: get root inode failed\n", __func__);
out_no_inode:
#ifdef CONFIG_JOLIET
unload_nls(sbi->s_nls_iocharset);
#endif
goto out_freesbi;
out_no_read:
- printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
+ pr_warn("%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
__func__, s->s_id, iso_blknum, block);
goto out_freebh;
out_bad_zone_size:
- printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n",
- sbi->s_log_zone_size);
+ pr_warn("Bad logical zone size %ld\n", sbi->s_log_zone_size);
goto out_freebh;
out_bad_size:
- printk(KERN_WARNING "ISOFS: Logical zone size(%d) < hardware blocksize(%u)\n",
+ pr_warn("Logical zone size(%d) < hardware blocksize(%u)\n",
orig_zonesize, opt.blocksize);
goto out_freebh;
out_unknown_format:
if (!silent)
- printk(KERN_WARNING "ISOFS: Unable to identify CD-ROM format.\n");
+ pr_warn("Unable to identify CD-ROM format.\n");
out_freebh:
brelse(bh);
@@ -1021,7 +1019,7 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
error = -EIO;
rv = 0;
if (iblock != b_off) {
- printk(KERN_DEBUG "%s: block number too large\n", __func__);
+ pr_debug("%s: block number too large\n", __func__);
goto abort;
}
@@ -1042,7 +1040,7 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
* I/O errors.
*/
if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
- printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
+ pr_debug("%s: block >= EOF (%lu, %llu)\n",
__func__, b_off,
(unsigned long long)inode->i_size);
goto abort;
@@ -1068,12 +1066,11 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
iput(ninode);
if (++section > 100) {
- printk(KERN_DEBUG "%s: More than 100 file sections ?!?"
- " aborting...\n", __func__);
- printk(KERN_DEBUG "%s: block=%lu firstext=%u sect_size=%u "
- "nextblk=%lu nextoff=%lu\n", __func__,
- b_off, firstext, (unsigned) sect_size,
- nextblk, nextoff);
+ pr_debug("%s: More than 100 file sections ?!? aborting...\n",
+ __func__);
+ pr_debug("%s: block=%lu firstext=%u sect_size=%u nextblk=%lu nextoff=%lu\n",
+ __func__, b_off, firstext,
+ (unsigned) sect_size, nextblk, nextoff);
goto abort;
}
}
@@ -1105,7 +1102,7 @@ static int isofs_get_block(struct inode *inode, sector_t iblock,
int ret;
if (create) {
- printk(KERN_DEBUG "%s: Kernel tries to allocate a block\n", __func__);
+ pr_debug("%s: Kernel tries to allocate a block\n", __func__);
return -EROFS;
}
@@ -1248,13 +1245,12 @@ out_nomem:
return -ENOMEM;
out_noread:
- printk(KERN_INFO "ISOFS: unable to read i-node block %lu\n", block);
+ pr_info("unable to read i-node block %lu\n", block);
kfree(tmpde);
return -EIO;
out_toomany:
- printk(KERN_INFO "%s: More than 100 file sections ?!?, aborting...\n"
- "isofs_read_level3_size: inode=%lu\n",
+ pr_info("%s: More than 100 file sections ?!?, aborting...\n isofs_read_level3_size: inode=%lu\n",
__func__, inode->i_ino);
goto out;
}
@@ -1289,7 +1285,7 @@ static int isofs_read_inode(struct inode *inode)
tmpde = kmalloc(de_len, GFP_KERNEL);
if (tmpde == NULL) {
- printk(KERN_INFO "%s: out of memory\n", __func__);
+ pr_info("%s: out of memory\n", __func__);
ret = -ENOMEM;
goto fail;
}
@@ -1364,24 +1360,23 @@ static int isofs_read_inode(struct inode *inode)
inode->i_size &= 0x00ffffff;
if (de->interleave[0]) {
- printk(KERN_DEBUG "ISOFS: Interleaved files not (yet) supported.\n");
+ pr_debug("Interleaved files not (yet) supported.\n");
inode->i_size = 0;
}
/* I have no idea what file_unit_size is used for, so
we will flag it for now */
if (de->file_unit_size[0] != 0) {
- printk(KERN_DEBUG "ISOFS: File unit size != 0 for ISO file (%ld).\n",
- inode->i_ino);
+ pr_debug("File unit size != 0 for ISO file (%ld).\n",
+ inode->i_ino);
}
/* I have no idea what other flag bits are used for, so
we will flag it for now */
-#ifdef DEBUG
+#ifdef DEBUG_FLAGS
if((de->flags[-high_sierra] & ~2)!= 0){
- printk(KERN_DEBUG "ISOFS: Unusual flag settings for ISO file "
- "(%ld %x).\n",
- inode->i_ino, de->flags[-high_sierra]);
+ pr_debug("Unusual flag settings for ISO file (%ld %x).\n",
+ inode->i_ino, de->flags[-high_sierra]);
}
#endif
@@ -1450,7 +1445,7 @@ out:
return ret;
out_badread:
- printk(KERN_WARNING "ISOFS: unable to read i-node block\n");
+ pr_warn("unable to read i-node block\n");
fail:
goto out;
}
@@ -1541,6 +1536,7 @@ MODULE_ALIAS("iso9660");
static int __init init_iso9660_fs(void)
{
int err = init_inodecache();
+
if (err)
goto out;
#ifdef CONFIG_ZISOFS
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 95295640d9c8..c5ed09733112 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -113,9 +113,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
dpnt = de->name;
/* Basic sanity check, whether name doesn't exceed dir entry */
if (de_len < dlen + sizeof(struct iso_directory_record)) {
- printk(KERN_NOTICE "iso9660: Corrupted directory entry"
- " in block %lu of inode %lu\n", block,
- dir->i_ino);
+ pr_notice("Corrupted directory entry in block %lu of inode %lu\n",
+ block, dir->i_ino);
return 0;
}
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c0bf42472e40..b13119556e5d 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -5,6 +5,8 @@
*
* Rock Ridge Extensions to iso9660
*/
+#define DEBUG
+#define pr_fmt(fmt) "ISOFS: rock: " fmt
#include <linux/slab.h>
#include <linux/pagemap.h>
@@ -89,9 +91,8 @@ static int rock_continue(struct rock_state *rs)
if ((unsigned)rs->cont_offset > blocksize - min_de_size ||
(unsigned)rs->cont_size > blocksize ||
(unsigned)(rs->cont_offset + rs->cont_size) > blocksize) {
- printk(KERN_NOTICE "rock: corrupted directory entry. "
- "extent=%d, offset=%d, size=%d\n",
- rs->cont_extent, rs->cont_offset, rs->cont_size);
+ pr_notice("corrupted directory entry. extent=%d, offset=%d, size=%d\n",
+ rs->cont_extent, rs->cont_offset, rs->cont_size);
ret = -EIO;
goto out;
}
@@ -117,7 +118,7 @@ static int rock_continue(struct rock_state *rs)
rs->cont_offset = 0;
return 0;
}
- printk("Unable to read rock-ridge attributes\n");
+ pr_warn("Unable to read rock-ridge attributes\n");
}
out:
kfree(rs->buffer);
@@ -176,10 +177,9 @@ static int rock_check_overflow(struct rock_state *rs, int sig)
}
len += offsetof(struct rock_ridge, u);
if (len > rs->len) {
- printk(KERN_NOTICE "rock: directory entry would overflow "
- "storage\n");
- printk(KERN_NOTICE "rock: sig=0x%02x, size=%d, remaining=%d\n",
- sig, len, rs->len);
+ pr_notice("directory entry would overflow storage\n");
+ pr_notice("sig=0x%02x, size=%d, remaining=%d\n",
+ sig, len, rs->len);
return -EIO;
}
return 0;
@@ -257,7 +257,7 @@ repeat:
break;
if (rr->u.NM.flags & ~1) {
- printk("Unsupported NM flag settings (%d)\n",
+ pr_warn("Unsupported NM flag settings (%d)\n",
rr->u.NM.flags);
break;
}
@@ -353,13 +353,13 @@ repeat:
break;
case SIG('E', 'R'):
ISOFS_SB(inode->i_sb)->s_rock = 1;
- printk(KERN_DEBUG "ISO 9660 Extensions: ");
+ pr_debug("ISO 9660 Extensions: ");
{
int p;
for (p = 0; p < rr->u.ER.len_id; p++)
- printk("%c", rr->u.ER.data[p]);
+ pr_warn("%c", rr->u.ER.data[p]);
}
- printk("\n");
+ pr_warn("\n");
break;
case SIG('P', 'X'):
inode->i_mode = isonum_733(rr->u.PX.mode);
@@ -450,8 +450,7 @@ repeat:
inode->i_size += 1;
break;
default:
- printk("Symlink component flag "
- "not implemented\n");
+ pr_warn("Symlink component flag not implemented\n");
}
slen -= slp->len + 2;
oldslp = slp;
@@ -481,8 +480,7 @@ repeat:
symlink_len = inode->i_size;
break;
case SIG('R', 'E'):
- printk(KERN_WARNING "Attempt to read inode for "
- "relocated directory\n");
+ pr_warn("Attempt to read inode for relocated directory\n");
goto out;
case SIG('C', 'L'):
ISOFS_I(inode)->i_first_extent =
@@ -518,9 +516,7 @@ repeat:
int block_shift =
isonum_711(&rr->u.ZF.parms[1]);
if (block_shift > 17) {
- printk(KERN_WARNING "isofs: "
- "Can't handle ZF block "
- "size of 2^%d\n",
+ pr_warn("Can't handle ZF block size of 2^%d\n",
block_shift);
} else {
/*
@@ -543,9 +539,7 @@ repeat:
real_size);
}
} else {
- printk(KERN_WARNING
- "isofs: Unknown ZF compression "
- "algorithm: %c%c\n",
+ pr_warn("Unknown ZF compression algorithm: %c%c\n",
rr->u.ZF.algorithm[0],
rr->u.ZF.algorithm[1]);
}
@@ -604,7 +598,7 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
*rpnt++ = '/';
break;
default:
- printk("Symlink component flag not implemented (%d)\n",
+ pr_warn("Symlink component flag not implemented (%d)\n",
slp->flags);
}
slen -= slp->len + 2;
@@ -757,10 +751,10 @@ out:
kfree(rs.buffer);
goto fail;
out_noread:
- printk("unable to read i-node block");
+ pr_warn("unable to read i-node block");
goto fail;
out_bad_span:
- printk("symlink spans iso9660 blocks\n");
+ pr_warn("symlink spans iso9660 blocks\n");
fail:
brelse(bh);
error:
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 0b9a1e44e833..5698dae5d92d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -94,11 +94,12 @@ static int jffs2_zlib_compress(unsigned char *data_in,
while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) {
def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE);
- def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out);
- jffs2_dbg(1, "calling deflate with avail_in %d, avail_out %d\n",
+ def_strm.avail_in = min_t(unsigned long,
+ (*sourcelen-def_strm.total_in), def_strm.avail_out);
+ jffs2_dbg(1, "calling deflate with avail_in %ld, avail_out %ld\n",
def_strm.avail_in, def_strm.avail_out);
ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH);
- jffs2_dbg(1, "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n",
+ jffs2_dbg(1, "deflate returned with avail_in %ld, avail_out %ld, total_in %ld, total_out %ld\n",
def_strm.avail_in, def_strm.avail_out,
def_strm.total_in, def_strm.total_out);
if (ret != Z_OK) {
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 48140315f627..380d86e1ab45 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1019,11 +1019,11 @@ static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
/**
* logfs_is_valid_block - check whether this block is still valid
*
- * @sb - superblock
- * @ofs - block physical offset
- * @ino - block inode number
- * @bix - block index
- * @level - block level
+ * @sb: superblock
+ * @ofs: block physical offset
+ * @ino: block inode number
+ * @bix: block index
+ * @gc_level: block level
*
* Returns 0 if the block is invalid, 1 if it is valid and 2 if it will
* become invalid once the journal is written.
@@ -2226,10 +2226,9 @@ void btree_write_block(struct logfs_block *block)
*
* @inode: parent inode (ifile or directory)
* @buf: object to write (inode or dentry)
- * @n: object size
- * @_pos: object number (file position in blocks/objects)
+ * @count: object size
+ * @bix: block index
* @flags: write flags
- * @lock: 0 if write lock is already taken, 1 otherwise
* @shadow_tree: shadow below this inode
*
* FIXME: All caller of this put a 200-300 byte variable on the stack,
diff --git a/fs/mpage.c b/fs/mpage.c
index 5f9ed622274f..003f6fe3cdb6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -480,6 +480,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
@@ -588,7 +589,7 @@ page_is_mapped:
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (bio && mpd->last_block_in_bio != blocks[0] - 1)
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
alloc_new:
if (bio == NULL) {
@@ -612,7 +613,7 @@ alloc_new:
*/
length = first_unmapped << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
goto alloc_new;
}
@@ -622,7 +623,7 @@ alloc_new:
set_page_writeback(page);
unlock_page(page);
if (boundary || (first_unmapped != blocks_per_page)) {
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
@@ -634,7 +635,7 @@ alloc_new:
confused:
if (bio)
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
if (mpd->use_writepage) {
ret = mapping->a_ops->writepage(page, wbc);
@@ -690,8 +691,11 @@ mpage_writepages(struct address_space *mapping,
};
ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
- if (mpd.bio)
- mpage_bio_submit(WRITE, mpd.bio);
+ if (mpd.bio) {
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : WRITE);
+ mpage_bio_submit(wr, mpd.bio);
+ }
}
blk_finish_plug(&plug);
return ret;
@@ -708,8 +712,11 @@ int mpage_writepage(struct page *page, get_block_t get_block,
.use_writepage = 0,
};
int ret = __mpage_writepage(page, wbc, &mpd);
- if (mpd.bio)
- mpage_bio_submit(WRITE, mpd.bio);
+ if (mpd.bio) {
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : WRITE);
+ mpage_bio_submit(wr, mpd.bio);
+ }
return ret;
}
EXPORT_SYMBOL(mpage_writepage);
diff --git a/fs/namespace.c b/fs/namespace.c
index 7886176232c1..0acabea58319 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,7 +798,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
list_splice(&head, n->list.prev);
if (shadows)
- hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+ hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
else
hlist_add_head_rcu(&mnt->mnt_hash,
m_hash(&parent->mnt, mnt->mnt_mountpoint));
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile
index 85c98737a146..fc603e0431bb 100644
--- a/fs/nilfs2/Makefile
+++ b/fs/nilfs2/Makefile
@@ -2,4 +2,4 @@ obj-$(CONFIG_NILFS2_FS) += nilfs2.o
nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \
btnode.o bmap.o btree.o direct.o dat.o recovery.o \
the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \
- ifile.o alloc.o gcinode.o ioctl.o
+ ifile.o alloc.o gcinode.o ioctl.o sysfs.o
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 9bc72dec3fa6..0696161bf59d 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -320,6 +320,14 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *);
int nilfs_init_gcinode(struct inode *inode);
void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs);
+/* sysfs.c */
+int __init nilfs_sysfs_init(void);
+void nilfs_sysfs_exit(void);
+int nilfs_sysfs_create_device_group(struct super_block *);
+void nilfs_sysfs_delete_device_group(struct the_nilfs *);
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *);
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *);
+
/*
* Inodes and files operations
*/
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index ac914994dfed..058470602cd1 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1014,7 +1014,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
struct dentry *dentry;
int ret;
- if (cno < 0 || cno > nilfs->ns_cno)
+ if (cno > nilfs->ns_cno)
return false;
if (cno >= nilfs_last_cno(nilfs))
@@ -1452,13 +1452,19 @@ static int __init init_nilfs_fs(void)
if (err)
goto fail;
- err = register_filesystem(&nilfs_fs_type);
+ err = nilfs_sysfs_init();
if (err)
goto free_cachep;
+ err = register_filesystem(&nilfs_fs_type);
+ if (err)
+ goto deinit_sysfs_entry;
+
printk(KERN_INFO "NILFS version 2 loaded\n");
return 0;
+deinit_sysfs_entry:
+ nilfs_sysfs_exit();
free_cachep:
nilfs_destroy_cachep();
fail:
@@ -1468,6 +1474,7 @@ fail:
static void __exit exit_nilfs_fs(void)
{
nilfs_destroy_cachep();
+ nilfs_sysfs_exit();
unregister_filesystem(&nilfs_fs_type);
}
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
new file mode 100644
index 000000000000..bbb0dcc35905
--- /dev/null
+++ b/fs/nilfs2/sysfs.c
@@ -0,0 +1,1137 @@
+/*
+ * sysfs.c - sysfs support implementation.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#include <linux/kobject.h>
+
+#include "nilfs.h"
+#include "mdt.h"
+#include "sufile.h"
+#include "cpfile.h"
+#include "sysfs.h"
+
+/* /sys/fs/<nilfs>/ */
+static struct kset *nilfs_kset;
+
+#define NILFS_SHOW_TIME(time_t_val, buf) ({ \
+ struct tm res; \
+ int count = 0; \
+ time_to_tm(time_t_val, 0, &res); \
+ res.tm_year += 1900; \
+ res.tm_mon += 1; \
+ count = scnprintf(buf, PAGE_SIZE, \
+ "%ld-%.2d-%.2d %.2d:%.2d:%.2d\n", \
+ res.tm_year, res.tm_mon, res.tm_mday, \
+ res.tm_hour, res.tm_min, res.tm_sec);\
+ count; \
+})
+
+#define NILFS_DEV_INT_GROUP_OPS(name, parent_name) \
+static ssize_t nilfs_##name##_attr_show(struct kobject *kobj, \
+ struct attribute *attr, char *buf) \
+{ \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ struct nilfs_##name##_attr *a = container_of(attr, \
+ struct nilfs_##name##_attr, \
+ attr); \
+ return a->show ? a->show(a, nilfs, buf) : 0; \
+} \
+static ssize_t nilfs_##name##_attr_store(struct kobject *kobj, \
+ struct attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ struct nilfs_##name##_attr *a = container_of(attr, \
+ struct nilfs_##name##_attr, \
+ attr); \
+ return a->store ? a->store(a, nilfs, buf, len) : 0; \
+} \
+static const struct sysfs_ops nilfs_##name##_attr_ops = { \
+ .show = nilfs_##name##_attr_show, \
+ .store = nilfs_##name##_attr_store, \
+};
+
+#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
+static void nilfs_##name##_attr_release(struct kobject *kobj) \
+{ \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ subgroups = nilfs->ns_##parent_name##_subgroups; \
+ complete(&subgroups->sg_##name##_kobj_unregister); \
+} \
+static struct kobj_type nilfs_##name##_ktype = { \
+ .default_attrs = nilfs_##name##_attrs, \
+ .sysfs_ops = &nilfs_##name##_attr_ops, \
+ .release = nilfs_##name##_attr_release, \
+};
+
+#define NILFS_DEV_INT_GROUP_FNS(name, parent_name) \
+static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
+{ \
+ struct kobject *parent; \
+ struct kobject *kobj; \
+ struct completion *kobj_unregister; \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
+ int err; \
+ subgroups = nilfs->ns_##parent_name##_subgroups; \
+ kobj = &subgroups->sg_##name##_kobj; \
+ kobj_unregister = &subgroups->sg_##name##_kobj_unregister; \
+ parent = &nilfs->ns_##parent_name##_kobj; \
+ kobj->kset = nilfs_kset; \
+ init_completion(kobj_unregister); \
+ err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
+ #name); \
+ if (err) \
+ return err; \
+ return 0; \
+} \
+static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
+{ \
+ kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
+}
+
+/************************************************************************
+ * NILFS snapshot attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_snapshot_inodes_count_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)atomic64_read(&root->inodes_count));
+}
+
+static ssize_t
+nilfs_snapshot_blocks_count_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)atomic64_read(&root->blocks_count));
+}
+
+static const char snapshot_readme_str[] =
+ "The group contains details about mounted snapshot.\n\n"
+ "(1) inodes_count\n\tshow number of inodes for snapshot.\n\n"
+ "(2) blocks_count\n\tshow number of blocks for snapshot.\n\n";
+
+static ssize_t
+nilfs_snapshot_README_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, snapshot_readme_str);
+}
+
+NILFS_SNAPSHOT_RO_ATTR(inodes_count);
+NILFS_SNAPSHOT_RO_ATTR(blocks_count);
+NILFS_SNAPSHOT_RO_ATTR(README);
+
+static struct attribute *nilfs_snapshot_attrs[] = {
+ NILFS_SNAPSHOT_ATTR_LIST(inodes_count),
+ NILFS_SNAPSHOT_ATTR_LIST(blocks_count),
+ NILFS_SNAPSHOT_ATTR_LIST(README),
+ NULL,
+};
+
+static ssize_t nilfs_snapshot_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct nilfs_root *root =
+ container_of(kobj, struct nilfs_root, snapshot_kobj);
+ struct nilfs_snapshot_attr *a =
+ container_of(attr, struct nilfs_snapshot_attr, attr);
+
+ return a->show ? a->show(a, root, buf) : 0;
+}
+
+static ssize_t nilfs_snapshot_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct nilfs_root *root =
+ container_of(kobj, struct nilfs_root, snapshot_kobj);
+ struct nilfs_snapshot_attr *a =
+ container_of(attr, struct nilfs_snapshot_attr, attr);
+
+ return a->store ? a->store(a, root, buf, len) : 0;
+}
+
+static void nilfs_snapshot_attr_release(struct kobject *kobj)
+{
+ struct nilfs_root *root = container_of(kobj, struct nilfs_root,
+ snapshot_kobj);
+ complete(&root->snapshot_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_snapshot_attr_ops = {
+ .show = nilfs_snapshot_attr_show,
+ .store = nilfs_snapshot_attr_store,
+};
+
+static struct kobj_type nilfs_snapshot_ktype = {
+ .default_attrs = nilfs_snapshot_attrs,
+ .sysfs_ops = &nilfs_snapshot_attr_ops,
+ .release = nilfs_snapshot_attr_release,
+};
+
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
+{
+ struct the_nilfs *nilfs;
+ struct kobject *parent;
+ int err;
+
+ nilfs = root->nilfs;
+ parent = &nilfs->ns_dev_subgroups->sg_mounted_snapshots_kobj;
+ root->snapshot_kobj.kset = nilfs_kset;
+ init_completion(&root->snapshot_kobj_unregister);
+
+ if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
+ err = kobject_init_and_add(&root->snapshot_kobj,
+ &nilfs_snapshot_ktype,
+ &nilfs->ns_dev_kobj,
+ "current_checkpoint");
+ } else {
+ err = kobject_init_and_add(&root->snapshot_kobj,
+ &nilfs_snapshot_ktype,
+ parent,
+ "%llu", root->cno);
+ }
+
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
+{
+ kobject_del(&root->snapshot_kobj);
+}
+
+/************************************************************************
+ * NILFS mounted snapshots attrs *
+ ************************************************************************/
+
+static const char mounted_snapshots_readme_str[] =
+ "The mounted_snapshots group contains group for\n"
+ "every mounted snapshot.\n";
+
+static ssize_t
+nilfs_mounted_snapshots_README_show(struct nilfs_mounted_snapshots_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, mounted_snapshots_readme_str);
+}
+
+NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(README);
+
+static struct attribute *nilfs_mounted_snapshots_attrs[] = {
+ NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_TYPE(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_FNS(mounted_snapshots, dev);
+
+/************************************************************************
+ * NILFS checkpoints attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 ncheckpoints;
+ struct nilfs_cpstat cpstat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ ncheckpoints = cpstat.cs_ncps;
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", ncheckpoints);
+}
+
+static ssize_t
+nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 nsnapshots;
+ struct nilfs_cpstat cpstat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ nsnapshots = cpstat.cs_nsss;
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", nsnapshots);
+}
+
+static ssize_t
+nilfs_checkpoints_last_seg_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 last_cno;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_cno = nilfs->ns_last_cno;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_checkpoints_next_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 cno;
+
+ down_read(&nilfs->ns_sem);
+ cno = nilfs->ns_cno;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
+}
+
+static const char checkpoints_readme_str[] =
+ "The checkpoints group contains attributes that describe\n"
+ "details about volume's checkpoints.\n\n"
+ "(1) checkpoints_number\n\tshow number of checkpoints on volume.\n\n"
+ "(2) snapshots_number\n\tshow number of snapshots on volume.\n\n"
+ "(3) last_seg_checkpoint\n"
+ "\tshow checkpoint number of the latest segment.\n\n"
+ "(4) next_checkpoint\n\tshow next checkpoint number.\n\n";
+
+static ssize_t
+nilfs_checkpoints_README_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, checkpoints_readme_str);
+}
+
+NILFS_CHECKPOINTS_RO_ATTR(checkpoints_number);
+NILFS_CHECKPOINTS_RO_ATTR(snapshots_number);
+NILFS_CHECKPOINTS_RO_ATTR(last_seg_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(next_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(README);
+
+static struct attribute *nilfs_checkpoints_attrs[] = {
+ NILFS_CHECKPOINTS_ATTR_LIST(checkpoints_number),
+ NILFS_CHECKPOINTS_ATTR_LIST(snapshots_number),
+ NILFS_CHECKPOINTS_ATTR_LIST(last_seg_checkpoint),
+ NILFS_CHECKPOINTS_ATTR_LIST(next_checkpoint),
+ NILFS_CHECKPOINTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(checkpoints, dev);
+NILFS_DEV_INT_GROUP_TYPE(checkpoints, dev);
+NILFS_DEV_INT_GROUP_FNS(checkpoints, dev);
+
+/************************************************************************
+ * NILFS segments attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segments_segments_number_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_nsegments);
+}
+
+static ssize_t
+nilfs_segments_blocks_per_segment_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_blocks_per_segment);
+}
+
+static ssize_t
+nilfs_segments_clean_segments_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned long ncleansegs;
+
+ down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+ ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
+ up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", ncleansegs);
+}
+
+static ssize_t
+nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_sustat sustat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get segment stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", sustat.ss_ndirtysegs);
+}
+
+static const char segments_readme_str[] =
+ "The segments group contains attributes that describe\n"
+ "details about volume's segments.\n\n"
+ "(1) segments_number\n\tshow number of segments on volume.\n\n"
+ "(2) blocks_per_segment\n\tshow number of blocks in segment.\n\n"
+ "(3) clean_segments\n\tshow count of clean segments.\n\n"
+ "(4) dirty_segments\n\tshow count of dirty segments.\n\n";
+
+static ssize_t
+nilfs_segments_README_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, segments_readme_str);
+}
+
+NILFS_SEGMENTS_RO_ATTR(segments_number);
+NILFS_SEGMENTS_RO_ATTR(blocks_per_segment);
+NILFS_SEGMENTS_RO_ATTR(clean_segments);
+NILFS_SEGMENTS_RO_ATTR(dirty_segments);
+NILFS_SEGMENTS_RO_ATTR(README);
+
+static struct attribute *nilfs_segments_attrs[] = {
+ NILFS_SEGMENTS_ATTR_LIST(segments_number),
+ NILFS_SEGMENTS_ATTR_LIST(blocks_per_segment),
+ NILFS_SEGMENTS_ATTR_LIST(clean_segments),
+ NILFS_SEGMENTS_ATTR_LIST(dirty_segments),
+ NILFS_SEGMENTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(segments, dev);
+NILFS_DEV_INT_GROUP_TYPE(segments, dev);
+NILFS_DEV_INT_GROUP_FNS(segments, dev);
+
+/************************************************************************
+ * NILFS segctor attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segctor_last_pseg_block_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ sector_t last_pseg;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_pseg = nilfs->ns_last_pseg;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)last_pseg);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_sequence_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u64 last_seq;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_seq = nilfs->ns_last_seq;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_seq);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_checkpoint_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 last_cno;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_cno = nilfs->ns_last_cno;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_segctor_current_seg_sequence_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u64 seg_seq;
+
+ down_read(&nilfs->ns_sem);
+ seg_seq = nilfs->ns_seg_seq;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", seg_seq);
+}
+
+static ssize_t
+nilfs_segctor_current_last_full_seg_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 segnum;
+
+ down_read(&nilfs->ns_sem);
+ segnum = nilfs->ns_segnum;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", segnum);
+}
+
+static ssize_t
+nilfs_segctor_next_full_seg_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 nextnum;
+
+ down_read(&nilfs->ns_sem);
+ nextnum = nilfs->ns_nextnum;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", nextnum);
+}
+
+static ssize_t
+nilfs_segctor_next_pseg_offset_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned long pseg_offset;
+
+ down_read(&nilfs->ns_sem);
+ pseg_offset = nilfs->ns_pseg_offset;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", pseg_offset);
+}
+
+static ssize_t
+nilfs_segctor_next_checkpoint_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 cno;
+
+ down_read(&nilfs->ns_sem);
+ cno = nilfs->ns_cno;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t ctime;
+
+ down_read(&nilfs->ns_sem);
+ ctime = nilfs->ns_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(ctime, buf);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t ctime;
+
+ down_read(&nilfs->ns_sem);
+ ctime = nilfs->ns_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)ctime);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t nongc_ctime;
+
+ down_read(&nilfs->ns_sem);
+ nongc_ctime = nilfs->ns_nongc_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(nongc_ctime, buf);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t nongc_ctime;
+
+ down_read(&nilfs->ns_sem);
+ nongc_ctime = nilfs->ns_nongc_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)nongc_ctime);
+}
+
+static ssize_t
+nilfs_segctor_dirty_data_blocks_count_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u32 ndirtyblks;
+
+ down_read(&nilfs->ns_sem);
+ ndirtyblks = atomic_read(&nilfs->ns_ndirtyblks);
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", ndirtyblks);
+}
+
+static const char segctor_readme_str[] =
+ "The segctor group contains attributes that describe\n"
+ "segctor thread activity details.\n\n"
+ "(1) last_pseg_block\n"
+ "\tshow start block number of the latest segment.\n\n"
+ "(2) last_seg_sequence\n"
+ "\tshow sequence value of the latest segment.\n\n"
+ "(3) last_seg_checkpoint\n"
+ "\tshow checkpoint number of the latest segment.\n\n"
+ "(4) current_seg_sequence\n\tshow segment sequence counter.\n\n"
+ "(5) current_last_full_seg\n"
+ "\tshow index number of the latest full segment.\n\n"
+ "(6) next_full_seg\n"
+ "\tshow index number of the full segment index to be used next.\n\n"
+ "(7) next_pseg_offset\n"
+ "\tshow offset of next partial segment in the current full segment.\n\n"
+ "(8) next_checkpoint\n\tshow next checkpoint number.\n\n"
+ "(9) last_seg_write_time\n"
+ "\tshow write time of the last segment in human-readable format.\n\n"
+ "(10) last_seg_write_time_secs\n"
+ "\tshow write time of the last segment in seconds.\n\n"
+ "(11) last_nongc_write_time\n"
+ "\tshow write time of the last segment not for cleaner operation "
+ "in human-readable format.\n\n"
+ "(12) last_nongc_write_time_secs\n"
+ "\tshow write time of the last segment not for cleaner operation "
+ "in seconds.\n\n"
+ "(13) dirty_data_blocks_count\n"
+ "\tshow number of dirty data blocks.\n\n";
+
+static ssize_t
+nilfs_segctor_README_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, segctor_readme_str);
+}
+
+NILFS_SEGCTOR_RO_ATTR(last_pseg_block);
+NILFS_SEGCTOR_RO_ATTR(last_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(last_seg_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(current_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(current_last_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_pseg_offset);
+NILFS_SEGCTOR_RO_ATTR(next_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(dirty_data_blocks_count);
+NILFS_SEGCTOR_RO_ATTR(README);
+
+static struct attribute *nilfs_segctor_attrs[] = {
+ NILFS_SEGCTOR_ATTR_LIST(last_pseg_block),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_sequence),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_checkpoint),
+ NILFS_SEGCTOR_ATTR_LIST(current_seg_sequence),
+ NILFS_SEGCTOR_ATTR_LIST(current_last_full_seg),
+ NILFS_SEGCTOR_ATTR_LIST(next_full_seg),
+ NILFS_SEGCTOR_ATTR_LIST(next_pseg_offset),
+ NILFS_SEGCTOR_ATTR_LIST(next_checkpoint),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time_secs),
+ NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time),
+ NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time_secs),
+ NILFS_SEGCTOR_ATTR_LIST(dirty_data_blocks_count),
+ NILFS_SEGCTOR_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(segctor, dev);
+NILFS_DEV_INT_GROUP_TYPE(segctor, dev);
+NILFS_DEV_INT_GROUP_FNS(segctor, dev);
+
+/************************************************************************
+ * NILFS superblock attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t sbwtime;
+
+ down_read(&nilfs->ns_sem);
+ sbwtime = nilfs->ns_sbwtime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(sbwtime, buf);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t sbwtime;
+
+ down_read(&nilfs->ns_sem);
+ sbwtime = nilfs->ns_sbwtime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)sbwtime);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_count_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned sbwcount;
+
+ down_read(&nilfs->ns_sem);
+ sbwcount = nilfs->ns_sbwcount;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbwcount);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned sb_update_freq;
+
+ down_read(&nilfs->ns_sem);
+ sb_update_freq = nilfs->ns_sb_update_freq;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sb_update_freq);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_store(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ const char *buf, size_t count)
+{
+ unsigned val;
+ int err;
+
+ err = kstrtouint(skip_spaces(buf), 0, &val);
+ if (err) {
+ printk(KERN_ERR "NILFS: unable to convert string: err=%d\n",
+ err);
+ return err;
+ }
+
+ if (val < NILFS_SB_FREQ) {
+ val = NILFS_SB_FREQ;
+ printk(KERN_WARNING "NILFS: superblock update frequency cannot be lesser than 10 seconds\n");
+ }
+
+ down_write(&nilfs->ns_sem);
+ nilfs->ns_sb_update_freq = val;
+ up_write(&nilfs->ns_sem);
+
+ return count;
+}
+
+static const char sb_readme_str[] =
+ "The superblock group contains attributes that describe\n"
+ "superblock's details.\n\n"
+ "(1) sb_write_time\n\tshow previous write time of super block "
+ "in human-readable format.\n\n"
+ "(2) sb_write_time_secs\n\tshow previous write time of super block "
+ "in seconds.\n\n"
+ "(3) sb_write_count\n\tshow write count of super block.\n\n"
+ "(4) sb_update_frequency\n"
+ "\tshow/set interval of periodical update of superblock (in seconds).\n\n"
+ "\tYou can set preferable frequency of superblock update by command:\n\n"
+ "\t'echo <val> > /sys/fs/<nilfs>/<dev>/superblock/sb_update_frequency'\n";
+
+static ssize_t
+nilfs_superblock_README_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, sb_readme_str);
+}
+
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time_secs);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_count);
+NILFS_SUPERBLOCK_RW_ATTR(sb_update_frequency);
+NILFS_SUPERBLOCK_RO_ATTR(README);
+
+static struct attribute *nilfs_superblock_attrs[] = {
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time_secs),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_count),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_update_frequency),
+ NILFS_SUPERBLOCK_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(superblock, dev);
+NILFS_DEV_INT_GROUP_TYPE(superblock, dev);
+NILFS_DEV_INT_GROUP_FNS(superblock, dev);
+
+/************************************************************************
+ * NILFS device attrs *
+ ************************************************************************/
+
+static
+ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ u32 major = le32_to_cpu(sbp[0]->s_rev_level);
+ u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
+
+ return snprintf(buf, PAGE_SIZE, "%d.%d\n", major, minor);
+}
+
+static
+ssize_t nilfs_dev_blocksize_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", nilfs->ns_blocksize);
+}
+
+static
+ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", dev_size);
+}
+
+static
+ssize_t nilfs_dev_free_blocks_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ sector_t free_blocks = 0;
+
+ nilfs_count_free_blocks(nilfs, &free_blocks);
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)free_blocks);
+}
+
+static
+ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+
+ return snprintf(buf, PAGE_SIZE, "%pUb\n", sbp[0]->s_uuid);
+}
+
+static
+ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+
+ return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
+ sbp[0]->s_volume_name);
+}
+
+static const char dev_readme_str[] =
+ "The <device> group contains attributes that describe file system\n"
+ "partition's details.\n\n"
+ "(1) revision\n\tshow NILFS file system revision.\n\n"
+ "(2) blocksize\n\tshow volume block size in bytes.\n\n"
+ "(3) device_size\n\tshow volume size in bytes.\n\n"
+ "(4) free_blocks\n\tshow count of free blocks on volume.\n\n"
+ "(5) uuid\n\tshow volume's UUID.\n\n"
+ "(6) volume_name\n\tshow volume's name.\n\n";
+
+static ssize_t nilfs_dev_README_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, dev_readme_str);
+}
+
+NILFS_DEV_RO_ATTR(revision);
+NILFS_DEV_RO_ATTR(blocksize);
+NILFS_DEV_RO_ATTR(device_size);
+NILFS_DEV_RO_ATTR(free_blocks);
+NILFS_DEV_RO_ATTR(uuid);
+NILFS_DEV_RO_ATTR(volume_name);
+NILFS_DEV_RO_ATTR(README);
+
+static struct attribute *nilfs_dev_attrs[] = {
+ NILFS_DEV_ATTR_LIST(revision),
+ NILFS_DEV_ATTR_LIST(blocksize),
+ NILFS_DEV_ATTR_LIST(device_size),
+ NILFS_DEV_ATTR_LIST(free_blocks),
+ NILFS_DEV_ATTR_LIST(uuid),
+ NILFS_DEV_ATTR_LIST(volume_name),
+ NILFS_DEV_ATTR_LIST(README),
+ NULL,
+};
+
+static ssize_t nilfs_dev_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+ attr);
+
+ return a->show ? a->show(a, nilfs, buf) : 0;
+}
+
+static ssize_t nilfs_dev_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+ attr);
+
+ return a->store ? a->store(a, nilfs, buf, len) : 0;
+}
+
+static void nilfs_dev_attr_release(struct kobject *kobj)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ complete(&nilfs->ns_dev_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_dev_attr_ops = {
+ .show = nilfs_dev_attr_show,
+ .store = nilfs_dev_attr_store,
+};
+
+static struct kobj_type nilfs_dev_ktype = {
+ .default_attrs = nilfs_dev_attrs,
+ .sysfs_ops = &nilfs_dev_attr_ops,
+ .release = nilfs_dev_attr_release,
+};
+
+int nilfs_sysfs_create_device_group(struct super_block *sb)
+{
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ size_t devgrp_size = sizeof(struct nilfs_sysfs_dev_subgroups);
+ int err;
+
+ nilfs->ns_dev_subgroups = kzalloc(devgrp_size, GFP_KERNEL);
+ if (unlikely(!nilfs->ns_dev_subgroups)) {
+ err = -ENOMEM;
+ printk(KERN_ERR "NILFS: unable to allocate memory for device group\n");
+ goto failed_create_device_group;
+ }
+
+ nilfs->ns_dev_kobj.kset = nilfs_kset;
+ init_completion(&nilfs->ns_dev_kobj_unregister);
+ err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
+ "%s", sb->s_id);
+ if (err)
+ goto free_dev_subgroups;
+
+ err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
+ if (err)
+ goto cleanup_dev_kobject;
+
+ err = nilfs_sysfs_create_checkpoints_group(nilfs);
+ if (err)
+ goto delete_mounted_snapshots_group;
+
+ err = nilfs_sysfs_create_segments_group(nilfs);
+ if (err)
+ goto delete_checkpoints_group;
+
+ err = nilfs_sysfs_create_superblock_group(nilfs);
+ if (err)
+ goto delete_segments_group;
+
+ err = nilfs_sysfs_create_segctor_group(nilfs);
+ if (err)
+ goto delete_superblock_group;
+
+ return 0;
+
+delete_superblock_group:
+ nilfs_sysfs_delete_superblock_group(nilfs);
+
+delete_segments_group:
+ nilfs_sysfs_delete_segments_group(nilfs);
+
+delete_checkpoints_group:
+ nilfs_sysfs_delete_checkpoints_group(nilfs);
+
+delete_mounted_snapshots_group:
+ nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+
+cleanup_dev_kobject:
+ kobject_del(&nilfs->ns_dev_kobj);
+
+free_dev_subgroups:
+ kfree(nilfs->ns_dev_subgroups);
+
+failed_create_device_group:
+ return err;
+}
+
+void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
+{
+ nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+ nilfs_sysfs_delete_checkpoints_group(nilfs);
+ nilfs_sysfs_delete_segments_group(nilfs);
+ nilfs_sysfs_delete_superblock_group(nilfs);
+ nilfs_sysfs_delete_segctor_group(nilfs);
+ kobject_del(&nilfs->ns_dev_kobj);
+ kfree(nilfs->ns_dev_subgroups);
+}
+
+/************************************************************************
+ * NILFS feature attrs *
+ ************************************************************************/
+
+static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d.%d\n",
+ NILFS_CURRENT_REV, NILFS_MINOR_REV);
+}
+
+static const char features_readme_str[] =
+ "The features group contains attributes that describe NILFS file\n"
+ "system driver features.\n\n"
+ "(1) revision\n\tshow current revision of NILFS file system driver.\n";
+
+static ssize_t nilfs_feature_README_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, features_readme_str);
+}
+
+NILFS_FEATURE_RO_ATTR(revision);
+NILFS_FEATURE_RO_ATTR(README);
+
+static struct attribute *nilfs_feature_attrs[] = {
+ NILFS_FEATURE_ATTR_LIST(revision),
+ NILFS_FEATURE_ATTR_LIST(README),
+ NULL,
+};
+
+static const struct attribute_group nilfs_feature_attr_group = {
+ .name = "features",
+ .attrs = nilfs_feature_attrs,
+};
+
+int __init nilfs_sysfs_init(void)
+{
+ int err;
+
+ nilfs_kset = kset_create_and_add(NILFS_ROOT_GROUP_NAME, NULL, fs_kobj);
+ if (!nilfs_kset) {
+ err = -ENOMEM;
+ printk(KERN_ERR "NILFS: unable to create sysfs entry: err %d\n",
+ err);
+ goto failed_sysfs_init;
+ }
+
+ err = sysfs_create_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+ if (unlikely(err)) {
+ printk(KERN_ERR "NILFS: unable to create feature group: err %d\n",
+ err);
+ goto cleanup_sysfs_init;
+ }
+
+ return 0;
+
+cleanup_sysfs_init:
+ kset_unregister(nilfs_kset);
+
+failed_sysfs_init:
+ return err;
+}
+
+void nilfs_sysfs_exit(void)
+{
+ sysfs_remove_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+ kset_unregister(nilfs_kset);
+}
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
new file mode 100644
index 000000000000..677e3a1a8370
--- /dev/null
+++ b/fs/nilfs2/sysfs.h
@@ -0,0 +1,176 @@
+/*
+ * sysfs.h - sysfs support declarations.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#ifndef _NILFS_SYSFS_H
+#define _NILFS_SYSFS_H
+
+#include <linux/sysfs.h>
+
+#define NILFS_ROOT_GROUP_NAME "nilfs2"
+
+/*
+ * struct nilfs_sysfs_dev_subgroups - device subgroup kernel objects
+ * @sg_superblock_kobj: /sys/fs/<nilfs>/<device>/superblock
+ * @sg_superblock_kobj_unregister: completion state
+ * @sg_segctor_kobj: /sys/fs/<nilfs>/<device>/segctor
+ * @sg_segctor_kobj_unregister: completion state
+ * @sg_mounted_snapshots_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots
+ * @sg_mounted_snapshots_kobj_unregister: completion state
+ * @sg_checkpoints_kobj: /sys/fs/<nilfs>/<device>/checkpoints
+ * @sg_checkpoints_kobj_unregister: completion state
+ * @sg_segments_kobj: /sys/fs/<nilfs>/<device>/segments
+ * @sg_segments_kobj_unregister: completion state
+ */
+struct nilfs_sysfs_dev_subgroups {
+ /* /sys/fs/<nilfs>/<device>/superblock */
+ struct kobject sg_superblock_kobj;
+ struct completion sg_superblock_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/segctor */
+ struct kobject sg_segctor_kobj;
+ struct completion sg_segctor_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/mounted_snapshots */
+ struct kobject sg_mounted_snapshots_kobj;
+ struct completion sg_mounted_snapshots_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/checkpoints */
+ struct kobject sg_checkpoints_kobj;
+ struct completion sg_checkpoints_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/segments */
+ struct kobject sg_segments_kobj;
+ struct completion sg_segments_kobj_unregister;
+};
+
+#define NILFS_COMMON_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct kobject *, struct attribute *, \
+ char *); \
+ ssize_t (*store)(struct kobject *, struct attribute *, \
+ const char *, size_t); \
+};
+
+NILFS_COMMON_ATTR_STRUCT(feature);
+
+#define NILFS_DEV_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+ char *); \
+ ssize_t (*store)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+ const char *, size_t); \
+};
+
+NILFS_DEV_ATTR_STRUCT(dev);
+NILFS_DEV_ATTR_STRUCT(segments);
+NILFS_DEV_ATTR_STRUCT(mounted_snapshots);
+NILFS_DEV_ATTR_STRUCT(checkpoints);
+NILFS_DEV_ATTR_STRUCT(superblock);
+NILFS_DEV_ATTR_STRUCT(segctor);
+
+#define NILFS_CP_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+ char *); \
+ ssize_t (*store)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+ const char *, size_t); \
+};
+
+NILFS_CP_ATTR_STRUCT(snapshot);
+
+#define NILFS_ATTR(type, name, mode, show, store) \
+ static struct nilfs_##type##_attr nilfs_##type##_attr_##name = \
+ __ATTR(name, mode, show, store)
+
+#define NILFS_INFO_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0444, NULL, NULL)
+#define NILFS_RO_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0444, nilfs_##type##_##name##_show, NULL)
+#define NILFS_RW_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0644, \
+ nilfs_##type##_##name##_show, \
+ nilfs_##type##_##name##_store)
+
+#define NILFS_FEATURE_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(feature, name)
+#define NILFS_FEATURE_RO_ATTR(name) \
+ NILFS_RO_ATTR(feature, name)
+#define NILFS_FEATURE_RW_ATTR(name) \
+ NILFS_RW_ATTR(feature, name)
+
+#define NILFS_DEV_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(dev, name)
+#define NILFS_DEV_RO_ATTR(name) \
+ NILFS_RO_ATTR(dev, name)
+#define NILFS_DEV_RW_ATTR(name) \
+ NILFS_RW_ATTR(dev, name)
+
+#define NILFS_SEGMENTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(segments, name)
+#define NILFS_SEGMENTS_RW_ATTR(name) \
+ NILFS_RW_ATTR(segs_info, name)
+
+#define NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(mounted_snapshots, name)
+
+#define NILFS_CHECKPOINTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(checkpoints, name)
+#define NILFS_CHECKPOINTS_RW_ATTR(name) \
+ NILFS_RW_ATTR(checkpoints, name)
+
+#define NILFS_SNAPSHOT_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RO_ATTR(name) \
+ NILFS_RO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RW_ATTR(name) \
+ NILFS_RW_ATTR(snapshot, name)
+
+#define NILFS_SUPERBLOCK_RO_ATTR(name) \
+ NILFS_RO_ATTR(superblock, name)
+#define NILFS_SUPERBLOCK_RW_ATTR(name) \
+ NILFS_RW_ATTR(superblock, name)
+
+#define NILFS_SEGCTOR_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RO_ATTR(name) \
+ NILFS_RO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RW_ATTR(name) \
+ NILFS_RW_ATTR(segctor, name)
+
+#define NILFS_FEATURE_ATTR_LIST(name) \
+ (&nilfs_feature_attr_##name.attr)
+#define NILFS_DEV_ATTR_LIST(name) \
+ (&nilfs_dev_attr_##name.attr)
+#define NILFS_SEGMENTS_ATTR_LIST(name) \
+ (&nilfs_segments_attr_##name.attr)
+#define NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(name) \
+ (&nilfs_mounted_snapshots_attr_##name.attr)
+#define NILFS_CHECKPOINTS_ATTR_LIST(name) \
+ (&nilfs_checkpoints_attr_##name.attr)
+#define NILFS_SNAPSHOT_ATTR_LIST(name) \
+ (&nilfs_snapshot_attr_##name.attr)
+#define NILFS_SUPERBLOCK_ATTR_LIST(name) \
+ (&nilfs_superblock_attr_##name.attr)
+#define NILFS_SEGCTOR_ATTR_LIST(name) \
+ (&nilfs_segctor_attr_##name.attr)
+
+#endif /* _NILFS_SYSFS_H */
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8ba8229ba076..9da25fe9ea61 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -85,6 +85,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
nilfs->ns_cptree = RB_ROOT;
spin_lock_init(&nilfs->ns_cptree_lock);
init_rwsem(&nilfs->ns_segctor_sem);
+ nilfs->ns_sb_update_freq = NILFS_SB_FREQ;
return nilfs;
}
@@ -97,6 +98,7 @@ void destroy_nilfs(struct the_nilfs *nilfs)
{
might_sleep();
if (nilfs_init(nilfs)) {
+ nilfs_sysfs_delete_device_group(nilfs);
brelse(nilfs->ns_sbh[0]);
brelse(nilfs->ns_sbh[1]);
}
@@ -640,6 +642,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto failed_sbh;
+ err = nilfs_sysfs_create_device_group(sb);
+ if (err)
+ goto failed_sbh;
+
set_nilfs_init(nilfs);
err = 0;
out:
@@ -740,12 +746,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
{
struct rb_node **p, *parent;
struct nilfs_root *root, *new;
+ int err;
root = nilfs_lookup_root(nilfs, cno);
if (root)
return root;
- new = kmalloc(sizeof(*root), GFP_KERNEL);
+ new = kzalloc(sizeof(*root), GFP_KERNEL);
if (!new)
return NULL;
@@ -782,6 +789,12 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
spin_unlock(&nilfs->ns_cptree_lock);
+ err = nilfs_sysfs_create_snapshot_group(new);
+ if (err) {
+ kfree(new);
+ new = NULL;
+ }
+
return new;
}
@@ -790,6 +803,8 @@ void nilfs_put_root(struct nilfs_root *root)
if (atomic_dec_and_test(&root->count)) {
struct the_nilfs *nilfs = root->nilfs;
+ nilfs_sysfs_delete_snapshot_group(root);
+
spin_lock(&nilfs->ns_cptree_lock);
rb_erase(&root->rb_node, &nilfs->ns_cptree);
spin_unlock(&nilfs->ns_cptree_lock);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index de8cc53b4a5c..d01ead1bea9a 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -33,6 +33,7 @@
#include <linux/slab.h>
struct nilfs_sc_info;
+struct nilfs_sysfs_dev_subgroups;
/* the_nilfs struct */
enum {
@@ -54,6 +55,7 @@ enum {
* @ns_sbwcount: write count of super block
* @ns_sbsize: size of valid data in super block
* @ns_mount_state: file system state
+ * @ns_sb_update_freq: interval of periodical update of superblocks (in seconds)
* @ns_seg_seq: segment sequence counter
* @ns_segnum: index number of the latest full segment.
* @ns_nextnum: index number of the full segment index to be used next
@@ -95,6 +97,9 @@ enum {
* @ns_inode_size: size of on-disk inode
* @ns_first_ino: first not-special inode number
* @ns_crc_seed: seed value of CRC32 calculation
+ * @ns_dev_kobj: /sys/fs/<nilfs>/<device>
+ * @ns_dev_kobj_unregister: completion state
+ * @ns_dev_subgroups: <device> subgroups pointer
*/
struct the_nilfs {
unsigned long ns_flags;
@@ -114,6 +119,7 @@ struct the_nilfs {
unsigned ns_sbwcount;
unsigned ns_sbsize;
unsigned ns_mount_state;
+ unsigned ns_sb_update_freq;
/*
* Following fields are dedicated to a writable FS-instance.
@@ -188,6 +194,11 @@ struct the_nilfs {
int ns_inode_size;
int ns_first_ino;
u32 ns_crc_seed;
+
+ /* /sys/fs/<nilfs>/<device> */
+ struct kobject ns_dev_kobj;
+ struct completion ns_dev_kobj_unregister;
+ struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
};
#define THE_NILFS_FNS(bit, name) \
@@ -232,6 +243,8 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
* @ifile: inode file
* @inodes_count: number of inodes
* @blocks_count: number of blocks
+ * @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot>
+ * @snapshot_kobj_unregister: completion state for kernel object
*/
struct nilfs_root {
__u64 cno;
@@ -243,6 +256,10 @@ struct nilfs_root {
atomic64_t inodes_count;
atomic64_t blocks_count;
+
+ /* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */
+ struct kobject snapshot_kobj;
+ struct completion snapshot_kobj_unregister;
};
/* Special checkpoint number */
@@ -254,7 +271,8 @@ struct nilfs_root {
static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
{
u64 t = get_seconds();
- return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ;
+ return t < nilfs->ns_sbwtime ||
+ t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
}
static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 74825be65b7b..9ce062218de9 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
- hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
+ hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
out:
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 68ca5a8704b5..ac851e8376b1 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
- hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
+ hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
out:
fsnotify_recalc_vfsmount_mask_locked(mnt);
spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5c9e2c81cb11..f5ec1ce7a532 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -74,8 +74,6 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
* ntfs_attr_extend_initialized - extend the initialized size of an attribute
* @ni: ntfs inode of the attribute to extend
* @new_init_size: requested new initialized size in bytes
- * @cached_page: store any allocated but unused page here
- * @lru_pvec: lru-buffering pagevec of the caller
*
* Extend the initialized size of an attribute described by the ntfs inode @ni
* to @new_init_size bytes. This involves zeroing any non-sparse space between
@@ -395,7 +393,6 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
* @nr_pages: number of page cache pages to obtain
* @pages: array of pages in which to return the obtained page cache pages
* @cached_page: allocated but as yet unused page
- * @lru_pvec: lru-buffering pagevec of caller
*
* Obtain @nr_pages locked page cache pages from the mapping @mapping and
* starting at index @index.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9d8fcf2f3b94..fcae9ef1a328 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4961,6 +4961,15 @@ leftright:
el = path_leaf_el(path);
split_index = ocfs2_search_extent_list(el, cpos);
+ if (split_index == -1) {
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu has an extent at cpos %u "
+ "which can no longer be found.\n",
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ cpos);
+ ret = -EROFS;
+ goto out;
+ }
goto leftright;
}
out:
@@ -5135,7 +5144,7 @@ int ocfs2_change_extent_flag(handle_t *handle,
el = path_leaf_el(left_path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(sb,
"Owner %llu has an extent at cpos %u which can no "
"longer be found.\n",
@@ -5491,7 +5500,7 @@ int ocfs2_remove_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
"Owner %llu has an extent at cpos %u which can no "
"longer be found.\n",
@@ -5557,7 +5566,7 @@ int ocfs2_remove_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
"Owner %llu: split at cpos %u lost record.",
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
@@ -5653,7 +5662,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 phys_cpos, u32 len, int flags,
struct ocfs2_cached_dealloc_ctxt *dealloc,
- u64 refcount_loc)
+ u64 refcount_loc, bool refcount_tree_locked)
{
int ret, credits = 0, extra_blocks = 0;
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
@@ -5667,11 +5676,13 @@ int ocfs2_remove_btree_range(struct inode *inode,
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
OCFS2_HAS_REFCOUNT_FL));
- ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
- &ref_tree, NULL);
- if (ret) {
- mlog_errno(ret);
- goto bail;
+ if (!refcount_tree_locked) {
+ ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+ &ref_tree, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ goto bail;
+ }
}
ret = ocfs2_prepare_refcount_change_for_del(inode,
@@ -7012,6 +7023,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
struct ocfs2_extent_tree et;
struct ocfs2_cached_dealloc_ctxt dealloc;
+ struct ocfs2_refcount_tree *ref_tree = NULL;
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
ocfs2_init_dealloc_ctxt(&dealloc);
@@ -7121,9 +7133,18 @@ start:
phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
+ if ((flags & OCFS2_EXT_REFCOUNTED) && trunc_len && !ref_tree) {
+ status = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+ &ref_tree, NULL);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
phys_cpos, trunc_len, flags, &dealloc,
- refcount_loc);
+ refcount_loc, true);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -7138,6 +7159,8 @@ start:
goto start;
bail:
+ if (ref_tree)
+ ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
ocfs2_schedule_truncate_log_flush(osb, 1);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index ca381c584127..fb09b97db162 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -142,7 +142,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 phys_cpos, u32 len, int flags,
struct ocfs2_cached_dealloc_ctxt *dealloc,
- u64 refcount_loc);
+ u64 refcount_loc, bool refcount_tree_locked);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct ocfs2_extent_tree *et);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 4a231a166cf8..2e636217a360 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -894,7 +894,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
}
}
-static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
{
int i;
@@ -915,7 +915,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
page_cache_release(wc->w_target_page);
}
ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
+}
+static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+{
+ ocfs2_unlock_pages(wc);
brelse(wc->w_di_bh);
kfree(wc);
}
@@ -1817,16 +1821,6 @@ try_again:
if (ret)
goto out_commit;
}
- /*
- * We don't want this to fail in ocfs2_write_end(), so do it
- * here.
- */
- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out_quota;
- }
/*
* Fill our page array first. That way we've grabbed enough so
@@ -1977,7 +1971,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- int i;
+ int i, ret;
unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
struct inode *inode = mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -2027,6 +2021,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
}
+ ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ copied = ret;
+ mlog_errno(ret);
+ goto out;
+ }
+
out_write_size:
pos += copied;
if (pos > i_size_read(inode)) {
@@ -2041,11 +2043,20 @@ out_write_size:
ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, wc->w_di_bh);
+out:
+ /* unlock pages before dealloc since it needs acquiring j_trans_barrier
+ * lock, or it will cause a deadlock since journal commit threads holds
+ * this lock and will ask for the page lock when flushing the data.
+ * put it here to preserve the unlock order.
+ */
+ ocfs2_unlock_pages(wc);
+
ocfs2_commit_trans(osb, handle);
ocfs2_run_deallocs(osb, &wc->w_dealloc);
- ocfs2_free_write_ctxt(wc);
+ brelse(wc->w_di_bh);
+ kfree(wc);
return copied;
}
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 1ec141e758d7..62e8ec619b4c 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -160,9 +160,18 @@ static void o2quo_make_decision(struct work_struct *work)
}
out:
- spin_unlock(&qs->qs_lock);
- if (fence)
+ if (fence) {
+ spin_unlock(&qs->qs_lock);
o2quo_fence_self();
+ } else {
+ mlog(ML_NOTICE, "not fencing this node, heartbeating: %d, "
+ "connected: %d, lowest: %d (%sreachable)\n",
+ qs->qs_heartbeating, qs->qs_connected, lowest_hb,
+ lowest_reachable ? "" : "un");
+ spin_unlock(&qs->qs_lock);
+
+ }
+
}
static void o2quo_set_hold(struct o2quo_state *qs, u8 node)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 681691bc233a..ea34952f9496 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1480,6 +1480,14 @@ static int o2net_set_nodelay(struct socket *sock)
return ret;
}
+static int o2net_set_usertimeout(struct socket *sock)
+{
+ int user_timeout = O2NET_TCP_USER_TIMEOUT;
+
+ return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
+ (char *)&user_timeout, sizeof(user_timeout));
+}
+
static void o2net_initialize_handshake(void)
{
o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
@@ -1536,16 +1544,20 @@ static void o2net_idle_timer(unsigned long data)
#endif
printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been "
- "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc),
- msecs / 1000, msecs % 1000);
+ "idle for %lu.%lu secs.\n",
+ SC_NODEF_ARGS(sc), msecs / 1000, msecs % 1000);
- /*
- * Initialize the nn_timeout so that the next connection attempt
- * will continue in o2net_start_connect.
+ /* idle timerout happen, don't shutdown the connection, but
+ * make fence decision. Maybe the connection can recover before
+ * the decision is made.
*/
atomic_set(&nn->nn_timeout, 1);
+ o2quo_conn_err(o2net_num_from_nn(nn));
+ queue_delayed_work(o2net_wq, &nn->nn_still_up,
+ msecs_to_jiffies(O2NET_QUORUM_DELAY_MS));
+
+ o2net_sc_reset_idle_timer(sc);
- o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
}
static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
@@ -1560,6 +1572,15 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
{
+ struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
+
+ /* clear fence decision since the connection recover from timeout*/
+ if (atomic_read(&nn->nn_timeout)) {
+ o2quo_conn_up(o2net_num_from_nn(nn));
+ cancel_delayed_work(&nn->nn_still_up);
+ atomic_set(&nn->nn_timeout, 0);
+ }
+
/* Only push out an existing timer */
if (timer_pending(&sc->sc_idle_timeout))
o2net_sc_reset_idle_timer(sc);
@@ -1650,6 +1671,12 @@ static void o2net_start_connect(struct work_struct *work)
goto out;
}
+ ret = o2net_set_usertimeout(sock);
+ if (ret) {
+ mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
+ goto out;
+ }
+
o2net_register_callbacks(sc->sc_sock->sk, sc);
spin_lock(&nn->nn_lock);
@@ -1831,6 +1858,12 @@ static int o2net_accept_one(struct socket *sock, int *more)
goto out;
}
+ ret = o2net_set_usertimeout(new_sock);
+ if (ret) {
+ mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
+ goto out;
+ }
+
slen = sizeof(sin);
ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
&slen, 1);
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 5bada2a69b50..c571e849fda4 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -63,6 +63,7 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data,
#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000
#define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000
+#define O2NET_TCP_USER_TIMEOUT 0x7fffffff
/* TODO: figure this out.... */
static inline int o2net_link_down(int err, struct socket *sock)
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 0717662b4aef..e737a4ce7d7a 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -4477,7 +4477,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
- &dealloc, 0);
+ &dealloc, 0, false);
if (ret) {
mlog_errno(ret);
goto out;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 39efc5057a36..3fcf205ee900 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1923,12 +1923,11 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
goto bail;
}
- if (total_backoff >
- msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
+ if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) {
status = -ERESTARTSYS;
mlog(ML_NOTICE, "Timed out joining dlm domain "
"%s after %u msecs\n", dlm->name,
- jiffies_to_msecs(total_backoff));
+ total_backoff);
goto bail;
}
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 82abf0cc9a12..a875436461fa 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -498,16 +498,6 @@ static void dlm_lockres_release(struct kref *kref)
mlog(0, "destroying lockres %.*s\n", res->lockname.len,
res->lockname.name);
- spin_lock(&dlm->track_lock);
- if (!list_empty(&res->tracking))
- list_del_init(&res->tracking);
- else {
- mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n",
- res->lockname.len, res->lockname.name);
- dlm_print_one_lock_resource(res);
- }
- spin_unlock(&dlm->track_lock);
-
atomic_dec(&dlm->res_cur_count);
if (!hlist_unhashed(&res->hash_node) ||
@@ -694,14 +684,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
res->inflight_assert_workers);
}
-static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
- struct dlm_lock_resource *res)
-{
- spin_lock(&res->spinlock);
- __dlm_lockres_grab_inflight_worker(dlm, res);
- spin_unlock(&res->spinlock);
-}
-
static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
@@ -1635,6 +1617,7 @@ send_response:
}
mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
dlm->node_num, res->lockname.len, res->lockname.name);
+ spin_lock(&res->spinlock);
ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
DLM_ASSERT_MASTER_MLE_CLEANUP);
if (ret < 0) {
@@ -1642,7 +1625,8 @@ send_response:
response = DLM_MASTER_RESP_ERROR;
dlm_lockres_put(res);
} else
- dlm_lockres_grab_inflight_worker(dlm, res);
+ __dlm_lockres_grab_inflight_worker(dlm, res);
+ spin_unlock(&res->spinlock);
} else {
if (res)
dlm_lockres_put(res);
@@ -2405,6 +2389,10 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
if (res->state & DLM_LOCK_RES_MIGRATING)
return 0;
+ /* delay migration when the lockres is in RECOCERING state */
+ if (res->state & DLM_LOCK_RES_RECOVERING)
+ return 0;
+
if (res->owner != dlm->node_num)
return 0;
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 69aac6f088ad..cd75e186cfc5 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -211,6 +211,16 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
__dlm_unhash_lockres(dlm, res);
+ spin_lock(&dlm->track_lock);
+ if (!list_empty(&res->tracking))
+ list_del_init(&res->tracking);
+ else {
+ mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n",
+ res->lockname.len, res->lockname.name);
+ dlm_print_one_lock_resource(res);
+ }
+ spin_unlock(&dlm->track_lock);
+
/* lockres is not in the hash now. drop the flag and wake up
* any processes waiting in dlm_get_lock_resource. */
if (!master) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 2930e231f3f9..319c858e70bf 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1804,7 +1804,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
phys_cpos, trunc_len, flags,
- &dealloc, refcount_loc);
+ &dealloc, refcount_loc, false);
if (ret < 0) {
mlog_errno(ret);
goto out;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 437de7f768c6..21708cda4b8a 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1192,17 +1192,9 @@ void ocfs2_evict_inode(struct inode *inode)
int ocfs2_drop_inode(struct inode *inode)
{
struct ocfs2_inode_info *oi = OCFS2_I(inode);
- int res;
-
trace_ocfs2_drop_inode((unsigned long long)oi->ip_blkno,
inode->i_nlink, oi->ip_flags);
-
- if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
- res = 1;
- else
- res = generic_drop_inode(inode);
-
- return res;
+ return 1;
}
/*
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 6f66b3751ace..53e6c40ed4c6 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -35,9 +35,8 @@
copy_to_user((typeof(a) __user *)b, &(a), sizeof(a))
/*
- * This call is void because we are already reporting an error that may
- * be -EFAULT. The error will be returned from the ioctl(2) call. It's
- * just a best-effort to tell userspace that this request caused the error.
+ * This is just a best-effort to tell userspace that this request
+ * caused the error.
*/
static inline void o2info_set_request_error(struct ocfs2_info_request *kreq,
struct ocfs2_info_request __user *req)
@@ -146,136 +145,105 @@ bail:
static int ocfs2_info_handle_blocksize(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_blocksize oib;
if (o2info_from_user(oib, req))
- goto bail;
+ return -EFAULT;
oib.ib_blocksize = inode->i_sb->s_blocksize;
o2info_set_request_filled(&oib.ib_req);
if (o2info_to_user(oib, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oib.ib_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_clustersize(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_clustersize oic;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oic, req))
- goto bail;
+ return -EFAULT;
oic.ic_clustersize = osb->s_clustersize;
o2info_set_request_filled(&oic.ic_req);
if (o2info_to_user(oic, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oic.ic_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_maxslots(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_maxslots oim;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oim, req))
- goto bail;
+ return -EFAULT;
oim.im_max_slots = osb->max_slots;
o2info_set_request_filled(&oim.im_req);
if (o2info_to_user(oim, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oim.im_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_label(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_label oil;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oil, req))
- goto bail;
+ return -EFAULT;
memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
o2info_set_request_filled(&oil.il_req);
if (o2info_to_user(oil, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oil.il_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_uuid(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_uuid oiu;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oiu, req))
- goto bail;
+ return -EFAULT;
memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
o2info_set_request_filled(&oiu.iu_req);
if (o2info_to_user(oiu, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oiu.iu_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_fs_features(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_fs_features oif;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oif, req))
- goto bail;
+ return -EFAULT;
oif.if_compat_features = osb->s_feature_compat;
oif.if_incompat_features = osb->s_feature_incompat;
@@ -284,39 +252,28 @@ static int ocfs2_info_handle_fs_features(struct inode *inode,
o2info_set_request_filled(&oif.if_req);
if (o2info_to_user(oif, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oif.if_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_journal_size(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_journal_size oij;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oij, req))
- goto bail;
+ return -EFAULT;
oij.ij_journal_size = i_size_read(osb->journal->j_inode);
o2info_set_request_filled(&oij.ij_req);
if (o2info_to_user(oij, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oij.ij_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
@@ -373,7 +330,7 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
u32 i;
u64 blkno = -1;
char namebuf[40];
- int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE;
+ int status, type = INODE_ALLOC_SYSTEM_INODE;
struct ocfs2_info_freeinode *oifi = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct inode *inode_alloc = NULL;
@@ -385,8 +342,10 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
goto out_err;
}
- if (o2info_from_user(*oifi, req))
- goto bail;
+ if (o2info_from_user(*oifi, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
oifi->ifi_slotnum = osb->max_slots;
@@ -424,14 +383,16 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
o2info_set_request_filled(&oifi->ifi_req);
- if (o2info_to_user(*oifi, req))
- goto bail;
+ if (o2info_to_user(*oifi, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
status = 0;
bail:
if (status)
o2info_set_request_error(&oifi->ifi_req, req);
-
+out_free:
kfree(oifi);
out_err:
return status;
@@ -658,7 +619,7 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
{
u64 blkno = -1;
char namebuf[40];
- int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE;
+ int status, type = GLOBAL_BITMAP_SYSTEM_INODE;
struct ocfs2_info_freefrag *oiff;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -671,8 +632,10 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
goto out_err;
}
- if (o2info_from_user(*oiff, req))
- goto bail;
+ if (o2info_from_user(*oiff, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
/*
* chunksize from userspace should be power of 2.
*/
@@ -711,14 +674,14 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
if (o2info_to_user(*oiff, req)) {
status = -EFAULT;
- goto bail;
+ goto out_free;
}
status = 0;
bail:
if (status)
o2info_set_request_error(&oiff->iff_req, req);
-
+out_free:
kfree(oiff);
out_err:
return status;
@@ -727,23 +690,17 @@ out_err:
static int ocfs2_info_handle_unknown(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_request oir;
if (o2info_from_user(oir, req))
- goto bail;
+ return -EFAULT;
o2info_clear_request_filled(&oir);
if (o2info_to_user(oir, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oir, req);
-
- return status;
+ return 0;
}
/*
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 599eb4c4c8be..6219aaadeb08 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -98,7 +98,7 @@ static int __ocfs2_move_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(inode->i_sb,
"Inode %llu has an extent at cpos %u which can no "
"longer be found.\n",
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 636aab69ead5..d81f6e2a97f5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3109,7 +3109,7 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(sb,
"Inode %llu has an extent at cpos %u which can no "
"longer be found.\n",
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 1424c151cccc..a88b2a4fcc85 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -382,7 +382,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
- si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
+ si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
GFP_KERNEL);
if (!si->si_bh) {
status = -ENOMEM;
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index ec58c7659183..ba8819702c56 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -321,7 +321,7 @@ static int omfs_get_imap(struct super_block *sb)
goto out;
sbi->s_imap_size = array_size;
- sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL);
+ sbi->s_imap = kcalloc(array_size, sizeof(unsigned long *), GFP_KERNEL);
if (!sbi->s_imap)
goto nomem;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0131156ce7c9..baf852b648ad 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -105,7 +105,7 @@
*/
struct pid_entry {
- char *name;
+ const char *name;
int len;
umode_t mode;
const struct inode_operations *iop;
@@ -130,10 +130,6 @@ struct pid_entry {
{ .proc_get_link = get_link } )
#define REG(NAME, MODE, fops) \
NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
-#define INF(NAME, MODE, read) \
- NOD(NAME, (S_IFREG|(MODE)), \
- NULL, &proc_info_file_operations, \
- { .proc_read = read } )
#define ONE(NAME, MODE, show) \
NOD(NAME, (S_IFREG|(MODE)), \
NULL, &proc_single_file_operations, \
@@ -200,27 +196,32 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}
-static int proc_pid_cmdline(struct task_struct *task, char *buffer)
+static int proc_pid_cmdline(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
- return get_cmdline(task, buffer, PAGE_SIZE);
+ /*
+ * Rely on struct seq_operations::show() being called once
+ * per internal buffer allocation. See single_open(), traverse().
+ */
+ BUG_ON(m->size < PAGE_SIZE);
+ m->count += get_cmdline(task, m->buf, PAGE_SIZE);
+ return 0;
}
-static int proc_pid_auxv(struct task_struct *task, char *buffer)
+static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ);
- int res = PTR_ERR(mm);
if (mm && !IS_ERR(mm)) {
unsigned int nwords = 0;
do {
nwords += 2;
} while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
- res = nwords * sizeof(mm->saved_auxv[0]);
- if (res > PAGE_SIZE)
- res = PAGE_SIZE;
- memcpy(buffer, mm->saved_auxv, res);
+ seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0]));
mmput(mm);
- }
- return res;
+ return 0;
+ } else
+ return PTR_ERR(mm);
}
@@ -229,7 +230,8 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer)
* Provides a wchan file via kallsyms in a proper one-value-per-file format.
* Returns the resolved symbol. If that fails, simply return the address.
*/
-static int proc_pid_wchan(struct task_struct *task, char *buffer)
+static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
unsigned long wchan;
char symname[KSYM_NAME_LEN];
@@ -240,9 +242,9 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
if (!ptrace_may_access(task, PTRACE_MODE_READ))
return 0;
else
- return sprintf(buffer, "%lu", wchan);
+ return seq_printf(m, "%lu", wchan);
else
- return sprintf(buffer, "%s", symname);
+ return seq_printf(m, "%s", symname);
}
#endif /* CONFIG_KALLSYMS */
@@ -304,9 +306,10 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
/*
* Provides /proc/PID/schedstat
*/
-static int proc_pid_schedstat(struct task_struct *task, char *buffer)
+static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
- return sprintf(buffer, "%llu %llu %lu\n",
+ return seq_printf(m, "%llu %llu %lu\n",
(unsigned long long)task->se.sum_exec_runtime,
(unsigned long long)task->sched_info.run_delay,
task->sched_info.pcount);
@@ -404,7 +407,8 @@ static const struct file_operations proc_cpuset_operations = {
};
#endif
-static int proc_oom_score(struct task_struct *task, char *buffer)
+static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
unsigned long totalpages = totalram_pages + total_swap_pages;
unsigned long points = 0;
@@ -414,12 +418,12 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
points = oom_badness(task, NULL, NULL, totalpages) *
1000 / totalpages;
read_unlock(&tasklist_lock);
- return sprintf(buffer, "%lu\n", points);
+ return seq_printf(m, "%lu\n", points);
}
struct limit_names {
- char *name;
- char *unit;
+ const char *name;
+ const char *unit;
};
static const struct limit_names lnames[RLIM_NLIMITS] = {
@@ -442,12 +446,11 @@ static const struct limit_names lnames[RLIM_NLIMITS] = {
};
/* Display limits for a process */
-static int proc_pid_limits(struct task_struct *task, char *buffer)
+static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
unsigned int i;
- int count = 0;
unsigned long flags;
- char *bufptr = buffer;
struct rlimit rlim[RLIM_NLIMITS];
@@ -459,35 +462,34 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
/*
* print the file header
*/
- count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
+ seq_printf(m, "%-25s %-20s %-20s %-10s\n",
"Limit", "Soft Limit", "Hard Limit", "Units");
for (i = 0; i < RLIM_NLIMITS; i++) {
if (rlim[i].rlim_cur == RLIM_INFINITY)
- count += sprintf(&bufptr[count], "%-25s %-20s ",
+ seq_printf(m, "%-25s %-20s ",
lnames[i].name, "unlimited");
else
- count += sprintf(&bufptr[count], "%-25s %-20lu ",
+ seq_printf(m, "%-25s %-20lu ",
lnames[i].name, rlim[i].rlim_cur);
if (rlim[i].rlim_max == RLIM_INFINITY)
- count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+ seq_printf(m, "%-20s ", "unlimited");
else
- count += sprintf(&bufptr[count], "%-20lu ",
- rlim[i].rlim_max);
+ seq_printf(m, "%-20lu ", rlim[i].rlim_max);
if (lnames[i].unit)
- count += sprintf(&bufptr[count], "%-10s\n",
- lnames[i].unit);
+ seq_printf(m, "%-10s\n", lnames[i].unit);
else
- count += sprintf(&bufptr[count], "\n");
+ seq_putc(m, '\n');
}
- return count;
+ return 0;
}
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
-static int proc_pid_syscall(struct task_struct *task, char *buffer)
+static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
long nr;
unsigned long args[6], sp, pc;
@@ -496,11 +498,11 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
return res;
if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
- res = sprintf(buffer, "running\n");
+ seq_puts(m, "running\n");
else if (nr < 0)
- res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+ seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
else
- res = sprintf(buffer,
+ seq_printf(m,
"%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
nr,
args[0], args[1], args[2], args[3], args[4], args[5],
@@ -598,43 +600,6 @@ static const struct inode_operations proc_def_inode_operations = {
.setattr = proc_setattr,
};
-#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
-
-static ssize_t proc_info_read(struct file * file, char __user * buf,
- size_t count, loff_t *ppos)
-{
- struct inode * inode = file_inode(file);
- unsigned long page;
- ssize_t length;
- struct task_struct *task = get_proc_task(inode);
-
- length = -ESRCH;
- if (!task)
- goto out_no_task;
-
- if (count > PROC_BLOCK_SIZE)
- count = PROC_BLOCK_SIZE;
-
- length = -ENOMEM;
- if (!(page = __get_free_page(GFP_TEMPORARY)))
- goto out;
-
- length = PROC_I(inode)->op.proc_read(task, (char*)page);
-
- if (length >= 0)
- length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
- free_page(page);
-out:
- put_task_struct(task);
-out_no_task:
- return length;
-}
-
-static const struct file_operations proc_info_file_operations = {
- .read = proc_info_read,
- .llseek = generic_file_llseek,
-};
-
static int proc_single_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
@@ -2056,7 +2021,7 @@ static int show_timer(struct seq_file *m, void *v)
struct k_itimer *timer;
struct timers_private *tp = m->private;
int notify;
- static char *nstr[] = {
+ static const char * const nstr[] = {
[SIGEV_SIGNAL] = "signal",
[SIGEV_NONE] = "none",
[SIGEV_THREAD] = "thread",
@@ -2392,7 +2357,7 @@ static const struct file_operations proc_coredump_filter_operations = {
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
-static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
+static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
{
struct task_io_accounting acct = task->ioac;
unsigned long flags;
@@ -2416,7 +2381,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
unlock_task_sighand(task, &flags);
}
- result = sprintf(buffer,
+ result = seq_printf(m,
"rchar: %llu\n"
"wchar: %llu\n"
"syscr: %llu\n"
@@ -2436,20 +2401,22 @@ out_unlock:
return result;
}
-static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
+static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
- return do_io_accounting(task, buffer, 0);
+ return do_io_accounting(task, m, 0);
}
-static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
{
- return do_io_accounting(task, buffer, 1);
+ return do_io_accounting(task, m, 1);
}
#endif /* CONFIG_TASK_IO_ACCOUNTING */
#ifdef CONFIG_USER_NS
static int proc_id_map_open(struct inode *inode, struct file *file,
- struct seq_operations *seq_ops)
+ const struct seq_operations *seq_ops)
{
struct user_namespace *ns = NULL;
struct task_struct *task;
@@ -2557,10 +2524,10 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
REG("environ", S_IRUSR, proc_environ_operations),
- INF("auxv", S_IRUSR, proc_pid_auxv),
+ ONE("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
- INF("limits", S_IRUGO, proc_pid_limits),
+ ONE("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
@@ -2569,9 +2536,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
- INF("syscall", S_IRUSR, proc_pid_syscall),
+ ONE("syscall", S_IRUSR, proc_pid_syscall),
#endif
- INF("cmdline", S_IRUGO, proc_pid_cmdline),
+ ONE("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tgid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps", S_IRUGO, proc_pid_maps_operations),
@@ -2594,13 +2561,13 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
#endif
#ifdef CONFIG_KALLSYMS
- INF("wchan", S_IRUGO, proc_pid_wchan),
+ ONE("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
- INF("schedstat", S_IRUGO, proc_pid_schedstat),
+ ONE("schedstat", S_IRUGO, proc_pid_schedstat),
#endif
#ifdef CONFIG_LATENCYTOP
REG("latency", S_IRUGO, proc_lstats_operations),
@@ -2611,7 +2578,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_CGROUPS
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
- INF("oom_score", S_IRUGO, proc_oom_score),
+ ONE("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
@@ -2625,10 +2592,10 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
- INF("io", S_IRUSR, proc_tgid_io_accounting),
+ ONE("io", S_IRUSR, proc_tgid_io_accounting),
#endif
#ifdef CONFIG_HARDWALL
- INF("hardwall", S_IRUGO, proc_pid_hardwall),
+ ONE("hardwall", S_IRUGO, proc_pid_hardwall),
#endif
#ifdef CONFIG_USER_NS
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
@@ -2780,12 +2747,12 @@ out:
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- int result = 0;
+ int result = -ENOENT;
struct task_struct *task;
unsigned tgid;
struct pid_namespace *ns;
- tgid = name_to_int(dentry);
+ tgid = name_to_int(&dentry->d_name);
if (tgid == ~0U)
goto out;
@@ -2904,18 +2871,18 @@ static const struct pid_entry tid_base_stuff[] = {
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
REG("environ", S_IRUSR, proc_environ_operations),
- INF("auxv", S_IRUSR, proc_pid_auxv),
+ ONE("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
- INF("limits", S_IRUGO, proc_pid_limits),
+ ONE("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
- INF("syscall", S_IRUSR, proc_pid_syscall),
+ ONE("syscall", S_IRUSR, proc_pid_syscall),
#endif
- INF("cmdline", S_IRUGO, proc_pid_cmdline),
+ ONE("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps", S_IRUGO, proc_tid_maps_operations),
@@ -2940,13 +2907,13 @@ static const struct pid_entry tid_base_stuff[] = {
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
#endif
#ifdef CONFIG_KALLSYMS
- INF("wchan", S_IRUGO, proc_pid_wchan),
+ ONE("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
- INF("schedstat", S_IRUGO, proc_pid_schedstat),
+ ONE("schedstat", S_IRUGO, proc_pid_schedstat),
#endif
#ifdef CONFIG_LATENCYTOP
REG("latency", S_IRUGO, proc_lstats_operations),
@@ -2957,7 +2924,7 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_CGROUPS
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
- INF("oom_score", S_IRUGO, proc_oom_score),
+ ONE("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
@@ -2968,10 +2935,10 @@ static const struct pid_entry tid_base_stuff[] = {
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
- INF("io", S_IRUSR, proc_tid_io_accounting),
+ ONE("io", S_IRUSR, proc_tid_io_accounting),
#endif
#ifdef CONFIG_HARDWALL
- INF("hardwall", S_IRUGO, proc_pid_hardwall),
+ ONE("hardwall", S_IRUGO, proc_pid_hardwall),
#endif
#ifdef CONFIG_USER_NS
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
@@ -3041,7 +3008,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
if (!leader)
goto out_no_task;
- tid = name_to_int(dentry);
+ tid = name_to_int(&dentry->d_name);
if (tid == ~0U)
goto out;
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 0788d093f5d8..955bb55fab8c 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -206,7 +206,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
{
struct task_struct *task = get_proc_task(dir);
int result = -ENOENT;
- unsigned fd = name_to_int(dentry);
+ unsigned fd = name_to_int(&dentry->d_name);
if (!task)
goto out_no_task;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b7f268eb5f45..317b72641ebf 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -27,7 +27,7 @@
#include "internal.h"
-DEFINE_SPINLOCK(proc_subdir_lock);
+static DEFINE_SPINLOCK(proc_subdir_lock);
static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
{
@@ -330,28 +330,28 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
nlink_t nlink)
{
struct proc_dir_entry *ent = NULL;
- const char *fn = name;
- unsigned int len;
-
- /* make sure name is valid */
- if (!name || !strlen(name))
- goto out;
+ const char *fn;
+ struct qstr qstr;
if (xlate_proc_name(name, parent, &fn) != 0)
goto out;
+ qstr.name = fn;
+ qstr.len = strlen(fn);
+ if (qstr.len == 0 || qstr.len >= 256) {
+ WARN(1, "name len %u\n", qstr.len);
+ return NULL;
+ }
+ if (*parent == &proc_root && name_to_int(&qstr) != ~0U) {
+ WARN(1, "create '/proc/%s' by hand\n", qstr.name);
+ return NULL;
+ }
- /* At this point there must not be any '/' characters beyond *fn */
- if (strchr(fn, '/'))
- goto out;
-
- len = strlen(fn);
-
- ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
+ ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
if (!ent)
goto out;
- memcpy(ent->name, fn, len + 1);
- ent->namelen = len;
+ memcpy(ent->name, fn, qstr.len + 1);
+ ent->namelen = qstr.len;
ent->mode = mode;
ent->nlink = nlink;
atomic_set(&ent->count, 1);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index ee04619173b2..7da13e49128a 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -52,7 +52,6 @@ struct proc_dir_entry {
union proc_op {
int (*proc_get_link)(struct dentry *, struct path *);
- int (*proc_read)(struct task_struct *task, char *page);
int (*proc_show)(struct seq_file *m,
struct pid_namespace *ns, struct pid *pid,
struct task_struct *task);
@@ -112,10 +111,10 @@ static inline int task_dumpable(struct task_struct *task)
return 0;
}
-static inline unsigned name_to_int(struct dentry *dentry)
+static inline unsigned name_to_int(const struct qstr *qstr)
{
- const char *name = dentry->d_name.name;
- int len = dentry->d_name.len;
+ const char *name = qstr->name;
+ int len = qstr->len;
unsigned n = 0;
if (len > 1 && *name == '0')
@@ -178,8 +177,6 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i
/*
* generic.c
*/
-extern spinlock_t proc_subdir_lock;
-
extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
struct dentry *);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 39e6ef32f0bd..6df8d0722c97 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -172,7 +172,7 @@ get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
- end = ALIGN(end, PAGE_SIZE);
+ end = PAGE_ALIGN(end);
/* overlap check (because we have to align page */
list_for_each_entry(tmp, head, list) {
if (tmp->type != KCORE_VMEMMAP)
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 7445af0b1aa3..aa1eee06420f 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -168,7 +168,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
K(global_page_state(NR_WRITEBACK)),
K(global_page_state(NR_ANON_PAGES)),
K(global_page_state(NR_FILE_MAPPED)),
- K(global_page_state(NR_SHMEM)),
+ K(i.sharedram),
K(global_page_state(NR_SLAB_RECLAIMABLE) +
global_page_state(NR_SLAB_UNRECLAIMABLE)),
K(global_page_state(NR_SLAB_RECLAIMABLE)),
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 71290463a1d3..f92d5dd578a4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -632,7 +632,7 @@ out:
return ret;
}
-static int scan(struct ctl_table_header *head, ctl_table *table,
+static int scan(struct ctl_table_header *head, struct ctl_table *table,
unsigned long *pos, struct file *file,
struct dir_context *ctx)
{
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index cb761f010300..15f327bed8c6 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -18,7 +18,7 @@
/*
* The /proc/tty directory inodes...
*/
-static struct proc_dir_entry *proc_tty_ldisc, *proc_tty_driver;
+static struct proc_dir_entry *proc_tty_driver;
/*
* This is the handler for /proc/tty/drivers
@@ -176,7 +176,7 @@ void __init proc_tty_init(void)
{
if (!proc_mkdir("tty", NULL))
return;
- proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL);
+ proc_mkdir("tty/ldisc", NULL); /* Preserved: it's userspace visible */
/*
* /proc/tty/driver/serial reveals the exact character counts for
* serial links which is just too easy to abuse for inferring
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 92c12c243ce3..6296c7626963 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -202,10 +202,10 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
{
- if (!proc_lookup(dir, dentry, flags))
+ if (!proc_pid_lookup(dir, dentry, flags))
return NULL;
- return proc_pid_lookup(dir, dentry, flags);
+ return proc_lookup(dir, dentry, flags);
}
static int proc_root_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 382aa890e228..a90d6d354199 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -328,6 +328,82 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
* virtually contiguous user-space in ELF layout.
*/
#ifdef CONFIG_MMU
+/*
+ * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages
+ * reported as not being ram with the zero page.
+ *
+ * @vma: vm_area_struct describing requested mapping
+ * @from: start remapping from
+ * @pfn: page frame number to start remapping to
+ * @size: remapping size
+ * @prot: protection bits
+ *
+ * Returns zero on success, -EAGAIN on failure.
+ */
+static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long map_size;
+ unsigned long pos_start, pos_end, pos;
+ unsigned long zeropage_pfn = my_zero_pfn(0);
+ size_t len = 0;
+
+ pos_start = pfn;
+ pos_end = pfn + (size >> PAGE_SHIFT);
+
+ for (pos = pos_start; pos < pos_end; ++pos) {
+ if (!pfn_is_ram(pos)) {
+ /*
+ * We hit a page which is not ram. Remap the continuous
+ * region between pos_start and pos-1 and replace
+ * the non-ram page at pos with the zero page.
+ */
+ if (pos > pos_start) {
+ /* Remap continuous region */
+ map_size = (pos - pos_start) << PAGE_SHIFT;
+ if (remap_oldmem_pfn_range(vma, from + len,
+ pos_start, map_size,
+ prot))
+ goto fail;
+ len += map_size;
+ }
+ /* Remap the zero page */
+ if (remap_oldmem_pfn_range(vma, from + len,
+ zeropage_pfn,
+ PAGE_SIZE, prot))
+ goto fail;
+ len += PAGE_SIZE;
+ pos_start = pos + 1;
+ }
+ }
+ if (pos > pos_start) {
+ /* Remap the rest */
+ map_size = (pos - pos_start) << PAGE_SHIFT;
+ if (remap_oldmem_pfn_range(vma, from + len, pos_start,
+ map_size, prot))
+ goto fail;
+ }
+ return 0;
+fail:
+ do_munmap(vma->vm_mm, from, len);
+ return -EAGAIN;
+}
+
+static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ /*
+ * Check if oldmem_pfn_is_ram was registered to avoid
+ * looping over all pages without a reason.
+ */
+ if (oldmem_pfn_is_ram)
+ return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
+ else
+ return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
+}
+
static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
{
size_t size = vma->vm_end - vma->vm_start;
@@ -387,9 +463,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
tsz = min_t(size_t, m->offset + m->size - start, size);
paddr = m->paddr + start - m->offset;
- if (remap_oldmem_pfn_range(vma, vma->vm_start + len,
- paddr >> PAGE_SHIFT, tsz,
- vma->vm_page_prot))
+ if (vmcore_remap_oldmem_pfn(vma, vma->vm_start + len,
+ paddr >> PAGE_SHIFT, tsz,
+ vma->vm_page_prot))
goto fail;
size -= tsz;
start += tsz;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 34a1e5aa848c..9d7b9a83699e 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -394,7 +394,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
prot = pgprot_noncached(PAGE_KERNEL);
- pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL);
+ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
if (!pages) {
pr_err("%s: Failed to allocate array for %u pages\n",
__func__, page_count);
diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile
index 9dd06199afc9..5e6bae6fae50 100644
--- a/fs/qnx6/Makefile
+++ b/fs/qnx6/Makefile
@@ -5,3 +5,4 @@
obj-$(CONFIG_QNX6FS_FS) += qnx6.o
qnx6-objs := inode.o dir.o namei.o super_mmi.o
+ccflags-$(CONFIG_QNX6FS_DEBUG) += -DDEBUG
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index 15b7d92ed60d..8d64bb5366bf 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -77,21 +77,20 @@ static int qnx6_dir_longfilename(struct inode *inode,
if (de->de_size != 0xff) {
/* error - long filename entries always have size 0xff
in direntry */
- printk(KERN_ERR "qnx6: invalid direntry size (%i).\n",
- de->de_size);
+ pr_err("invalid direntry size (%i).\n", de->de_size);
return 0;
}
lf = qnx6_longname(s, de, &page);
if (IS_ERR(lf)) {
- printk(KERN_ERR "qnx6:Error reading longname\n");
+ pr_err("Error reading longname\n");
return 0;
}
lf_size = fs16_to_cpu(sbi, lf->lf_size);
if (lf_size > QNX6_LONG_NAME_MAX) {
- QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname));
- printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size);
+ pr_debug("file %s\n", lf->lf_fname);
+ pr_err("Filename too long (%i)\n", lf_size);
qnx6_put_page(page);
return 0;
}
@@ -100,10 +99,10 @@ static int qnx6_dir_longfilename(struct inode *inode,
mmi 3g filesystem does not have that checksum */
if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) !=
qnx6_lfile_checksum(lf->lf_fname, lf_size))
- printk(KERN_INFO "qnx6: long filename checksum error.\n");
+ pr_info("long filename checksum error.\n");
- QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n",
- lf_size, lf->lf_fname, de_inode));
+ pr_debug("qnx6_readdir:%.*s inode:%u\n",
+ lf_size, lf->lf_fname, de_inode);
if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) {
qnx6_put_page(page);
return 0;
@@ -136,7 +135,7 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
int i = start;
if (IS_ERR(page)) {
- printk(KERN_ERR "qnx6_readdir: read failed\n");
+ pr_err("%s(): read failed\n", __func__);
ctx->pos = (n + 1) << PAGE_CACHE_SHIFT;
return PTR_ERR(page);
}
@@ -159,9 +158,9 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
break;
}
} else {
- QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s"
- " inode:%u\n", size, de->de_fname,
- no_inode));
+ pr_debug("%s():%.*s inode:%u\n",
+ __func__, size, de->de_fname,
+ no_inode);
if (!dir_emit(ctx, de->de_fname, size,
no_inode, DT_UNKNOWN)) {
done = true;
@@ -259,8 +258,7 @@ unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
if (ino)
goto found;
} else
- printk(KERN_ERR "qnx6: undefined "
- "filename size in inode.\n");
+ pr_err("undefined filename size in inode.\n");
}
qnx6_put_page(page);
}
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 65cdaab3ed49..44e73923670d 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -73,8 +73,8 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
{
unsigned phys;
- QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n",
- inode->i_ino, (unsigned long)iblock));
+ pr_debug("qnx6_get_block inode=[%ld] iblock=[%ld]\n",
+ inode->i_ino, (unsigned long)iblock);
phys = qnx6_block_map(inode, iblock);
if (phys) {
@@ -87,7 +87,7 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
static int qnx6_check_blockptr(__fs32 ptr)
{
if (ptr == ~(__fs32)0) {
- printk(KERN_ERR "qnx6: hit unused blockpointer.\n");
+ pr_err("hit unused blockpointer.\n");
return 0;
}
return 1;
@@ -127,8 +127,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
levelptr = no >> bitdelta;
if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) {
- printk(KERN_ERR "qnx6:Requested file block number (%u) too big.",
- no);
+ pr_err("Requested file block number (%u) too big.", no);
return 0;
}
@@ -137,8 +136,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
for (i = 0; i < depth; i++) {
bh = sb_bread(s, block);
if (!bh) {
- printk(KERN_ERR "qnx6:Error reading block (%u)\n",
- block);
+ pr_err("Error reading block (%u)\n", block);
return 0;
}
bitdelta -= ptrbits;
@@ -207,26 +205,16 @@ void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s)
{
struct qnx6_sb_info *sbi = QNX6_SB(s);
- QNX6DEBUG((KERN_INFO "magic: %08x\n",
- fs32_to_cpu(sbi, sb->sb_magic)));
- QNX6DEBUG((KERN_INFO "checksum: %08x\n",
- fs32_to_cpu(sbi, sb->sb_checksum)));
- QNX6DEBUG((KERN_INFO "serial: %llx\n",
- fs64_to_cpu(sbi, sb->sb_serial)));
- QNX6DEBUG((KERN_INFO "flags: %08x\n",
- fs32_to_cpu(sbi, sb->sb_flags)));
- QNX6DEBUG((KERN_INFO "blocksize: %08x\n",
- fs32_to_cpu(sbi, sb->sb_blocksize)));
- QNX6DEBUG((KERN_INFO "num_inodes: %08x\n",
- fs32_to_cpu(sbi, sb->sb_num_inodes)));
- QNX6DEBUG((KERN_INFO "free_inodes: %08x\n",
- fs32_to_cpu(sbi, sb->sb_free_inodes)));
- QNX6DEBUG((KERN_INFO "num_blocks: %08x\n",
- fs32_to_cpu(sbi, sb->sb_num_blocks)));
- QNX6DEBUG((KERN_INFO "free_blocks: %08x\n",
- fs32_to_cpu(sbi, sb->sb_free_blocks)));
- QNX6DEBUG((KERN_INFO "inode_levels: %02x\n",
- sb->Inode.levels));
+ pr_debug("magic: %08x\n", fs32_to_cpu(sbi, sb->sb_magic));
+ pr_debug("checksum: %08x\n", fs32_to_cpu(sbi, sb->sb_checksum));
+ pr_debug("serial: %llx\n", fs64_to_cpu(sbi, sb->sb_serial));
+ pr_debug("flags: %08x\n", fs32_to_cpu(sbi, sb->sb_flags));
+ pr_debug("blocksize: %08x\n", fs32_to_cpu(sbi, sb->sb_blocksize));
+ pr_debug("num_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_num_inodes));
+ pr_debug("free_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_free_inodes));
+ pr_debug("num_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_num_blocks));
+ pr_debug("free_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_free_blocks));
+ pr_debug("inode_levels: %02x\n", sb->Inode.levels);
}
#endif
@@ -277,7 +265,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
start with the first superblock */
bh = sb_bread(s, offset);
if (!bh) {
- printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+ pr_err("unable to read the first superblock\n");
return NULL;
}
sb = (struct qnx6_super_block *)bh->b_data;
@@ -285,20 +273,16 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
sbi->s_bytesex = BYTESEX_BE;
if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
/* we got a big endian fs */
- QNX6DEBUG((KERN_INFO "qnx6: fs got different"
- " endianness.\n"));
+ pr_debug("fs got different endianness.\n");
return bh;
} else
sbi->s_bytesex = BYTESEX_LE;
if (!silent) {
if (offset == 0) {
- printk(KERN_ERR "qnx6: wrong signature (magic)"
- " in superblock #1.\n");
+ pr_err("wrong signature (magic) in superblock #1.\n");
} else {
- printk(KERN_INFO "qnx6: wrong signature (magic)"
- " at position (0x%lx) - will try"
- " alternative position (0x0000).\n",
- offset * s->s_blocksize);
+ pr_info("wrong signature (magic) at position (0x%lx) - will try alternative position (0x0000).\n",
+ offset * s->s_blocksize);
}
}
brelse(bh);
@@ -329,13 +313,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* Superblock always is 512 Byte long */
if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto outnobh;
}
/* parse the mount-options */
if (!qnx6_parse_options((char *) data, s)) {
- printk(KERN_ERR "qnx6: invalid mount options.\n");
+ pr_err("invalid mount options.\n");
goto outnobh;
}
if (test_opt(s, MMI_FS)) {
@@ -355,7 +339,7 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* try again without bootblock offset */
bh1 = qnx6_check_first_superblock(s, 0, silent);
if (!bh1) {
- printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+ pr_err("unable to read the first superblock\n");
goto outnobh;
}
/* seems that no bootblock at partition start */
@@ -370,13 +354,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
/* set new blocksize */
if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto out;
}
/* blocksize invalidates bh - pull it back in */
@@ -398,21 +382,20 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* next the second superblock */
bh2 = sb_bread(s, offset);
if (!bh2) {
- printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+ pr_err("unable to read the second superblock\n");
goto out;
}
sb2 = (struct qnx6_super_block *)bh2->b_data;
if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent)
- printk(KERN_ERR "qnx6: wrong signature (magic)"
- " in superblock #2.\n");
+ pr_err("wrong signature (magic) in superblock #2.\n");
goto out;
}
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb2->sb_checksum) !=
crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #2 checksum error\n");
+ pr_err("superblock #2 checksum error\n");
goto out;
}
@@ -422,25 +405,24 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
sbi->sb_buf = bh1;
sbi->sb = (struct qnx6_super_block *)bh1->b_data;
brelse(bh2);
- printk(KERN_INFO "qnx6: superblock #1 active\n");
+ pr_info("superblock #1 active\n");
} else {
/* superblock #2 active */
sbi->sb_buf = bh2;
sbi->sb = (struct qnx6_super_block *)bh2->b_data;
brelse(bh1);
- printk(KERN_INFO "qnx6: superblock #2 active\n");
+ pr_info("superblock #2 active\n");
}
mmi_success:
/* sanity check - limit maximum indirect pointer levels */
if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) {
- printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n",
- QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
+ pr_err("too many inode levels (max %i, sb %i)\n",
+ QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
goto out;
}
if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) {
- printk(KERN_ERR "qnx6: too many longfilename levels"
- " (max %i, sb %i)\n",
- QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
+ pr_err("too many longfilename levels (max %i, sb %i)\n",
+ QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
goto out;
}
s->s_op = &qnx6_sops;
@@ -460,7 +442,7 @@ mmi_success:
/* prefetch root inode */
root = qnx6_iget(s, QNX6_ROOT_INO);
if (IS_ERR(root)) {
- printk(KERN_ERR "qnx6: get inode failed\n");
+ pr_err("get inode failed\n");
ret = PTR_ERR(root);
goto out2;
}
@@ -474,7 +456,7 @@ mmi_success:
errmsg = qnx6_checkroot(s);
if (errmsg != NULL) {
if (!silent)
- printk(KERN_ERR "qnx6: %s\n", errmsg);
+ pr_err("%s\n", errmsg);
goto out3;
}
return 0;
@@ -555,8 +537,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
inode->i_mode = 0;
if (ino == 0) {
- printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is "
- "out of range\n",
+ pr_err("bad inode number on dev %s: %u is out of range\n",
sb->s_id, ino);
iget_failed(inode);
return ERR_PTR(-EIO);
@@ -566,8 +547,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
mapping = sbi->inodes->i_mapping;
page = read_mapping_page(mapping, n, NULL);
if (IS_ERR(page)) {
- printk(KERN_ERR "qnx6: major problem: unable to read inode from "
- "dev %s\n", sb->s_id);
+ pr_err("major problem: unable to read inode from dev %s\n",
+ sb->s_id);
iget_failed(inode);
return ERR_CAST(page);
}
@@ -689,7 +670,7 @@ static int __init init_qnx6_fs(void)
return err;
}
- printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n");
+ pr_info("QNX6 filesystem 1.0.0 registered.\n");
return 0;
}
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 0561326a94f5..6c1a323137dd 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -29,12 +29,12 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
foundinode = qnx6_iget(dir->i_sb, ino);
qnx6_put_page(page);
if (IS_ERR(foundinode)) {
- QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> "
- " error %ld\n", PTR_ERR(foundinode)));
+ pr_debug("lookup->iget -> error %ld\n",
+ PTR_ERR(foundinode));
return ERR_CAST(foundinode);
}
} else {
- QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name));
+ pr_debug("%s(): not found %s\n", __func__, name);
return NULL;
}
d_add(dentry, foundinode);
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index b00fcc960d37..d3fb2b698800 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -10,6 +10,12 @@
*
*/
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -19,12 +25,6 @@ typedef __u64 __bitwise __fs64;
#include <linux/qnx6_fs.h>
-#ifdef CONFIG_QNX6FS_DEBUG
-#define QNX6DEBUG(X) printk X
-#else
-#define QNX6DEBUG(X) (void) 0
-#endif
-
struct qnx6_sb_info {
struct buffer_head *sb_buf; /* superblock buffer */
struct qnx6_super_block *sb; /* our superblock */
diff --git a/fs/qnx6/super_mmi.c b/fs/qnx6/super_mmi.c
index 29c32cba62d6..62aaf3e3126a 100644
--- a/fs/qnx6/super_mmi.c
+++ b/fs/qnx6/super_mmi.c
@@ -44,15 +44,14 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
start with the first superblock */
bh1 = sb_bread(s, 0);
if (!bh1) {
- printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n");
+ pr_err("Unable to read first mmi superblock\n");
return NULL;
}
sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
sbi = QNX6_SB(s);
if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent) {
- printk(KERN_ERR "qnx6: wrong signature (magic) in"
- " superblock #1.\n");
+ pr_err("wrong signature (magic) in superblock #1.\n");
goto out;
}
}
@@ -60,7 +59,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
@@ -70,7 +69,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* set new blocksize */
if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto out;
}
/* blocksize invalidates bh - pull it back in */
@@ -83,27 +82,26 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* read second superblock */
bh2 = sb_bread(s, offset);
if (!bh2) {
- printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+ pr_err("unable to read the second superblock\n");
goto out;
}
sb2 = (struct qnx6_mmi_super_block *)bh2->b_data;
if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent)
- printk(KERN_ERR "qnx6: wrong signature (magic) in"
- " superblock #2.\n");
+ pr_err("wrong signature (magic) in superblock #2.\n");
goto out;
}
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb2->sb_checksum)
!= crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
qsb = kmalloc(sizeof(*qsb), GFP_KERNEL);
if (!qsb) {
- printk(KERN_ERR "qnx6: unable to allocate memory.\n");
+ pr_err("unable to allocate memory.\n");
goto out;
}
@@ -119,7 +117,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
sbi->sb_buf = bh1;
sbi->sb = (struct qnx6_super_block *)bh1->b_data;
brelse(bh2);
- printk(KERN_INFO "qnx6: superblock #1 active\n");
+ pr_info("superblock #1 active\n");
} else {
/* superblock #2 active */
qnx6_mmi_copy_sb(qsb, sb2);
@@ -131,7 +129,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
sbi->sb_buf = bh2;
sbi->sb = (struct qnx6_super_block *)bh2->b_data;
brelse(bh1);
- printk(KERN_INFO "qnx6: superblock #2 active\n");
+ pr_info("superblock #2 active\n");
}
kfree(qsb);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index dda012ad4208..bbafbde3471a 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -222,7 +222,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
/* gang-find the pages */
ret = -ENOMEM;
- pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL);
+ pages = kcalloc(lpages, sizeof(struct page *), GFP_KERNEL);
if (!pages)
goto out_free;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index d9f5a60dd59b..0a7dc941aaf4 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -9,7 +9,7 @@
#include <linux/stat.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
extern const struct reiserfs_key MIN_KEY;
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 4d5e5297793f..9c02d96d3a42 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -10,7 +10,7 @@
* and using buffers obtained after all above.
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/time.h>
#include "reiserfs.h"
#include <linux/buffer_head.h>
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index db9e80ba53a0..751dd3f4346b 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -6,7 +6,7 @@
#include "reiserfs.h"
#include "acl.h"
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/writeback.h>
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index 73231b1ebdbe..b751eea32e20 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -2,7 +2,7 @@
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/time.h>
#include "reiserfs.h"
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 63b2b0ec49e6..a7eec9888f10 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -11,7 +11,7 @@
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/unaligned.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 501ed6811a2b..6ec8a30a0911 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -7,7 +7,7 @@
#include <linux/mount.h>
#include "reiserfs.h"
#include <linux/time.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/pagemap.h>
#include <linux/compat.h>
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index cfaee912ee09..aca73dd73906 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -54,7 +54,7 @@ static void sd_print_item(struct item_head *ih, char *item)
} else {
struct stat_data *sd = (struct stat_data *)item;
- printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd),
+ printk("\t0%-6o | %6llu | %2u | %d | %s\n", sd_v2_mode(sd),
(unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
}
@@ -408,7 +408,7 @@ static void direntry_print_item(struct item_head *ih, char *item)
namebuf[namelen + 2] = 0;
}
- printk("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n",
+ printk("%d: %-15s%-15d%-15d%-15lld%-15lld(%s)\n",
i, namebuf,
deh_dir_id(deh), deh_objectid(deh),
GET_HASH_VALUE(deh_offset(deh)),
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 3a74d15eb814..249594a821e0 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -2,7 +2,7 @@
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/time.h>
#include "reiserfs.h"
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index c9b47e91baf8..ae1dc841db3a 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -17,7 +17,7 @@ static char off_buf[80];
static char *reiserfs_cpu_offset(struct cpu_key *key)
{
if (cpu_key_k_type(key) == TYPE_DIRENTRY)
- sprintf(off_buf, "%Lu(%Lu)",
+ sprintf(off_buf, "%llu(%llu)",
(unsigned long long)
GET_HASH_VALUE(cpu_key_k_offset(key)),
(unsigned long long)
@@ -34,7 +34,7 @@ static char *le_offset(struct reiserfs_key *key)
version = le_key_version(key);
if (le_key_k_type(version, key) == TYPE_DIRENTRY)
- sprintf(off_buf, "%Lu(%Lu)",
+ sprintf(off_buf, "%llu(%llu)",
(unsigned long long)
GET_HASH_VALUE(le_key_k_offset(version, key)),
(unsigned long long)
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 02b0b7d0f7d5..621b9f381fe1 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -11,7 +11,7 @@
#include <linux/module.h>
#include <linux/time.h>
#include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "reiserfs.h"
#include <linux/init.h>
#include <linux/proc_fs.h>
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index dd44468edc2b..24cbe013240f 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -2006,7 +2006,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
&s_search_path) == POSITION_FOUND);
RFALSE(file_size > ROUND_UP(new_file_size),
- "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
+ "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
update_and_out:
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index a392cef6acc6..709ea92d716f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -15,7 +15,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/time.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "reiserfs.h"
#include "acl.h"
#include "xattr.h"
@@ -331,7 +331,7 @@ static int finish_unfinished(struct super_block *s)
* not completed truncate found. New size was
* committed together with "save" link
*/
- reiserfs_info(s, "Truncating %k to %Ld ..",
+ reiserfs_info(s, "Truncating %k to %lld ..",
INODE_PKEY(inode), inode->i_size);
/* don't update modification time */
@@ -1577,7 +1577,7 @@ static int read_super_block(struct super_block *s, int offset)
rs = (struct reiserfs_super_block *)bh->b_data;
if (sb_blocksize(rs) != s->s_blocksize) {
reiserfs_warning(s, "sh-2011", "can't find a reiserfs "
- "filesystem on (dev %s, block %Lu, size %lu)",
+ "filesystem on (dev %s, block %llu, size %lu)",
s->s_id,
(unsigned long long)bh->b_blocknr,
s->s_blocksize);
@@ -2441,8 +2441,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
struct buffer_head tmp_bh, *bh;
if (!current->journal_info) {
- printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)"
- " cancelled because transaction is not started.\n",
+ printk(KERN_WARNING "reiserfs: Quota write (off=%llu, len=%llu) cancelled because transaction is not started.\n",
(unsigned long long)off, (unsigned long long)len);
return -EIO;
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ca416d099e7d..7c36898af402 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -45,7 +45,7 @@
#include <linux/xattr.h>
#include "xattr.h"
#include "acl.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/checksum.h>
#include <linux/stat.h>
#include <linux/quotaops.h>
@@ -84,6 +84,7 @@ static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
static int xattr_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
+
BUG_ON(!mutex_is_locked(&dir->i_mutex));
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -98,6 +99,7 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
+
BUG_ON(!mutex_is_locked(&dir->i_mutex));
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -117,6 +119,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
{
struct dentry *privroot = REISERFS_SB(sb)->priv_root;
struct dentry *xaroot;
+
if (!privroot->d_inode)
return ERR_PTR(-ENODATA);
@@ -127,6 +130,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
xaroot = ERR_PTR(-ENODATA);
else if (!xaroot->d_inode) {
int err = -ENODATA;
+
if (xattr_may_create(flags))
err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
if (err) {
@@ -157,6 +161,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
if (!IS_ERR(xadir) && !xadir->d_inode) {
int err = -ENODATA;
+
if (xattr_may_create(flags))
err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
if (err) {
@@ -188,6 +193,7 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
{
struct reiserfs_dentry_buf *dbuf = buf;
struct dentry *dentry;
+
WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
@@ -218,6 +224,7 @@ static void
cleanup_dentry_buf(struct reiserfs_dentry_buf *buf)
{
int i;
+
for (i = 0; i < buf->count; i++)
if (buf->dentries[i])
dput(buf->dentries[i]);
@@ -283,11 +290,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
struct reiserfs_transaction_handle th;
+
reiserfs_write_lock(inode->i_sb);
err = journal_begin(&th, inode->i_sb, blocks);
reiserfs_write_unlock(inode->i_sb);
if (!err) {
int jerror;
+
mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
I_MUTEX_XATTR);
err = action(dir, data);
@@ -340,6 +349,7 @@ static int chown_one_xattr(struct dentry *dentry, void *data)
int reiserfs_delete_xattrs(struct inode *inode)
{
int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL);
+
if (err)
reiserfs_warning(inode->i_sb, "jdm-20004",
"Couldn't delete all xattrs (%d)\n", err);
@@ -350,6 +360,7 @@ int reiserfs_delete_xattrs(struct inode *inode)
int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
{
int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs);
+
if (err)
reiserfs_warning(inode->i_sb, "jdm-20007",
"Couldn't chown all xattrs (%d)\n", err);
@@ -439,6 +450,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
static void update_ctime(struct inode *inode)
{
struct timespec now = current_fs_time(inode->i_sb);
+
if (inode_unhashed(inode) || !inode->i_nlink ||
timespec_equal(&inode->i_ctime, &now))
return;
@@ -514,6 +526,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
size_t chunk;
size_t skip = 0;
size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
+
if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE;
else
@@ -530,6 +543,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
if (file_pos == 0) {
struct reiserfs_xattr_header *rxh;
+
skip = file_pos = sizeof(struct reiserfs_xattr_header);
if (chunk + skip > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE - skip;
@@ -659,6 +673,7 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
size_t chunk;
char *data;
size_t skip = 0;
+
if (isize - file_pos > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE;
else
@@ -792,6 +807,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
int reiserfs_removexattr(struct dentry *dentry, const char *name)
{
const struct xattr_handler *handler;
+
handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
@@ -813,9 +829,11 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
{
struct listxattr_buf *b = (struct listxattr_buf *)buf;
size_t size;
+
if (name[0] != '.' ||
(namelen != 1 && (name[1] != '.' || namelen != 2))) {
const struct xattr_handler *handler;
+
handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
name);
if (!handler) /* Unsupported xattr name */
@@ -885,6 +903,7 @@ static int create_privroot(struct dentry *dentry)
{
int err;
struct inode *inode = dentry->d_parent->d_inode;
+
WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
err = xattr_mkdir(inode, dentry, 0700);
@@ -1015,6 +1034,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
mutex_lock(&privroot->d_inode->i_mutex);
if (!REISERFS_SB(s)->xattr_root) {
struct dentry *dentry;
+
dentry = lookup_one_len(XAROOT_NAME, privroot,
strlen(XAROOT_NAME));
if (!IS_ERR(dentry))
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 44503e293790..4b34b9dc03dd 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -9,7 +9,7 @@
#include <linux/posix_acl_xattr.h>
#include "xattr.h"
#include "acl.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th,
struct inode *inode, int type,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 800a3cef6f62..e7f8939a4cb5 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,7 +6,7 @@
#include <linux/slab.h>
#include "xattr.h"
#include <linux/security.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a0035719f66b..5eeb0c48ba46 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -5,7 +5,7 @@
#include <linux/pagemap.h>
#include <linux/xattr.h>
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 8667491ae7c3..e50eab046471 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -4,7 +4,7 @@
#include <linux/pagemap.h>
#include <linux/xattr.h>
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index ef90e8bca95a..e98dd88197d5 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -56,6 +56,8 @@
* 2 of the Licence, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/string.h>
#include <linux/fs.h>
@@ -380,7 +382,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
eio:
ret = -EIO;
error:
- printk(KERN_ERR "ROMFS: read error for inode 0x%lx\n", pos);
+ pr_err("read error for inode 0x%lx\n", pos);
return ERR_PTR(ret);
}
@@ -390,6 +392,7 @@ error:
static struct inode *romfs_alloc_inode(struct super_block *sb)
{
struct romfs_inode_info *inode;
+
inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL);
return inode ? &inode->vfs_inode : NULL;
}
@@ -400,6 +403,7 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
static void romfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
+
kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
}
@@ -507,15 +511,13 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 ||
img_size < ROMFH_SIZE) {
if (!silent)
- printk(KERN_WARNING "VFS:"
- " Can't find a romfs filesystem on dev %s.\n",
+ pr_warn("VFS: Can't find a romfs filesystem on dev %s.\n",
sb->s_id);
goto error_rsb_inval;
}
if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) {
- printk(KERN_ERR "ROMFS: bad initial checksum on dev %s.\n",
- sb->s_id);
+ pr_err("bad initial checksum on dev %s.\n", sb->s_id);
goto error_rsb_inval;
}
@@ -523,8 +525,8 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
len = strnlen(rsb->name, ROMFS_MAXFN);
if (!silent)
- printk(KERN_NOTICE "ROMFS: Mounting image '%*.*s' through %s\n",
- (unsigned) len, (unsigned) len, rsb->name, storage);
+ pr_notice("Mounting image '%*.*s' through %s\n",
+ (unsigned) len, (unsigned) len, rsb->name, storage);
kfree(rsb);
rsb = NULL;
@@ -614,7 +616,7 @@ static int __init init_romfs_fs(void)
{
int ret;
- printk(KERN_INFO "ROMFS MTD (C) 2007 Red Hat, Inc.\n");
+ pr_info("ROMFS MTD (C) 2007 Red Hat, Inc.\n");
romfs_inode_cachep =
kmem_cache_create("romfs_i",
@@ -623,13 +625,12 @@ static int __init init_romfs_fs(void)
romfs_i_init_once);
if (!romfs_inode_cachep) {
- printk(KERN_ERR
- "ROMFS error: Failed to initialise inode cache\n");
+ pr_err("Failed to initialise inode cache\n");
return -ENOMEM;
}
ret = register_filesystem(&romfs_fs_type);
if (ret) {
- printk(KERN_ERR "ROMFS error: Failed to register filesystem\n");
+ pr_err("Failed to register filesystem\n");
goto error_register;
}
return 0;
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 62a0de6632e1..43e7a7eddac0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -44,7 +44,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
pages = end_index - start_index + 1;
- page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+ page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
if (page == NULL)
return res;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 031c8d67fd51..5056babe00df 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -27,6 +27,8 @@
* the filesystem.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/slab.h>
@@ -448,8 +450,7 @@ static int __init init_squashfs_fs(void)
return err;
}
- printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
- "Phillip Lougher\n");
+ pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
return 0;
}
diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile
index dd39980437fc..4d0e02b022b3 100644
--- a/fs/ufs/Makefile
+++ b/fs/ufs/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_UFS_FS) += ufs.o
ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
namei.o super.o symlink.o truncate.o util.o
+ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 61e8a9b021dd..7c580c97990e 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -158,16 +158,16 @@ out:
/**
* ufs_inode_getfrag() - allocate new fragment(s)
- * @inode - pointer to inode
- * @fragment - number of `fragment' which hold pointer
+ * @inode: pointer to inode
+ * @fragment: number of `fragment' which hold pointer
* to new allocated fragment(s)
- * @new_fragment - number of new allocated fragment(s)
- * @required - how many fragment(s) we require
- * @err - we set it if something wrong
- * @phys - pointer to where we save physical number of new allocated fragments,
+ * @new_fragment: number of new allocated fragment(s)
+ * @required: how many fragment(s) we require
+ * @err: we set it if something wrong
+ * @phys: pointer to where we save physical number of new allocated fragments,
* NULL if we allocate not data(indirect blocks for example).
- * @new - we set it if we allocate new block
- * @locked_page - for ufs_new_fragments()
+ * @new: we set it if we allocate new block
+ * @locked_page: for ufs_new_fragments()
*/
static struct buffer_head *
ufs_inode_getfrag(struct inode *inode, u64 fragment,
@@ -315,16 +315,16 @@ repeat2:
/**
* ufs_inode_getblock() - allocate new block
- * @inode - pointer to inode
- * @bh - pointer to block which hold "pointer" to new allocated block
- * @fragment - number of `fragment' which hold pointer
+ * @inode: pointer to inode
+ * @bh: pointer to block which hold "pointer" to new allocated block
+ * @fragment: number of `fragment' which hold pointer
* to new allocated block
- * @new_fragment - number of new allocated fragment
+ * @new_fragment: number of new allocated fragment
* (block will hold this fragment and also uspi->s_fpb-1)
- * @err - see ufs_inode_getfrag()
- * @phys - see ufs_inode_getfrag()
- * @new - see ufs_inode_getfrag()
- * @locked_page - see ufs_inode_getfrag()
+ * @err: see ufs_inode_getfrag()
+ * @phys: see ufs_inode_getfrag()
+ * @new: see ufs_inode_getfrag()
+ * @locked_page: see ufs_inode_getfrag()
*/
static struct buffer_head *
ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b879f1ba3439..da73801301d5 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -65,7 +65,6 @@
* Evgeniy Dushistov <dushistov@mail.ru>, 2007
*/
-
#include <linux/exportfs.h>
#include <linux/module.h>
#include <linux/bitops.h>
@@ -172,73 +171,73 @@ static void ufs_print_super_stuff(struct super_block *sb,
{
u32 magic = fs32_to_cpu(sb, usb3->fs_magic);
- printk("ufs_print_super_stuff\n");
- printk(" magic: 0x%x\n", magic);
+ pr_debug("ufs_print_super_stuff\n");
+ pr_debug(" magic: 0x%x\n", magic);
if (fs32_to_cpu(sb, usb3->fs_magic) == UFS2_MAGIC) {
- printk(" fs_size: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
- printk(" fs_dsize: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
- printk(" bsize: %u\n",
- fs32_to_cpu(sb, usb1->fs_bsize));
- printk(" fsize: %u\n",
- fs32_to_cpu(sb, usb1->fs_fsize));
- printk(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname);
- printk(" fs_sblockloc: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
- printk(" cs_ndir(No of dirs): %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
- printk(" cs_nbfree(No of free blocks): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
- printk(KERN_INFO" cs_nifree(Num of free inodes): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
- printk(KERN_INFO" cs_nffree(Num of free frags): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
- printk(KERN_INFO" fs_maxsymlinklen: %u\n",
- fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
+ pr_debug(" fs_size: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
+ pr_debug(" fs_dsize: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
+ pr_debug(" bsize: %u\n",
+ fs32_to_cpu(sb, usb1->fs_bsize));
+ pr_debug(" fsize: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fsize));
+ pr_debug(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname);
+ pr_debug(" fs_sblockloc: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
+ pr_debug(" cs_ndir(No of dirs): %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
+ pr_debug(" cs_nbfree(No of free blocks): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
+ pr_info(" cs_nifree(Num of free inodes): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
+ pr_info(" cs_nffree(Num of free frags): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
+ pr_info(" fs_maxsymlinklen: %u\n",
+ fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
} else {
- printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
- printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
- printk(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
- printk(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
- printk(" cgoffset: %u\n",
- fs32_to_cpu(sb, usb1->fs_cgoffset));
- printk(" ~cgmask: 0x%x\n",
- ~fs32_to_cpu(sb, usb1->fs_cgmask));
- printk(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size));
- printk(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
- printk(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
- printk(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
- printk(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
- printk(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag));
- printk(" fragshift: %u\n",
- fs32_to_cpu(sb, usb1->fs_fragshift));
- printk(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
- printk(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
- printk(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
- printk(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc));
- printk(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
- printk(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
- printk(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
- printk(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
- printk(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
- printk(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
- printk(" fstodb: %u\n",
- fs32_to_cpu(sb, usb1->fs_fsbtodb));
- printk(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
- printk(" ndir %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
- printk(" nifree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
- printk(" nbfree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
- printk(" nffree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
+ pr_debug(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
+ pr_debug(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
+ pr_debug(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
+ pr_debug(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
+ pr_debug(" cgoffset: %u\n",
+ fs32_to_cpu(sb, usb1->fs_cgoffset));
+ pr_debug(" ~cgmask: 0x%x\n",
+ ~fs32_to_cpu(sb, usb1->fs_cgmask));
+ pr_debug(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size));
+ pr_debug(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
+ pr_debug(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
+ pr_debug(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
+ pr_debug(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
+ pr_debug(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag));
+ pr_debug(" fragshift: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fragshift));
+ pr_debug(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
+ pr_debug(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
+ pr_debug(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
+ pr_debug(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc));
+ pr_debug(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
+ pr_debug(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
+ pr_debug(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
+ pr_debug(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
+ pr_debug(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
+ pr_debug(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
+ pr_debug(" fstodb: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fsbtodb));
+ pr_debug(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
+ pr_debug(" ndir %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
+ pr_debug(" nifree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
+ pr_debug(" nbfree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
+ pr_debug(" nffree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
}
- printk("\n");
+ pr_debug("\n");
}
/*
@@ -247,38 +246,38 @@ static void ufs_print_super_stuff(struct super_block *sb,
static void ufs_print_cylinder_stuff(struct super_block *sb,
struct ufs_cylinder_group *cg)
{
- printk("\nufs_print_cylinder_stuff\n");
- printk("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
- printk(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic));
- printk(" time: %u\n", fs32_to_cpu(sb, cg->cg_time));
- printk(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx));
- printk(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
- printk(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk));
- printk(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
- printk(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
- printk(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
- printk(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
- printk(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
- printk(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor));
- printk(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor));
- printk(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor));
- printk(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n",
+ pr_debug("\nufs_print_cylinder_stuff\n");
+ pr_debug("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
+ pr_debug(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic));
+ pr_debug(" time: %u\n", fs32_to_cpu(sb, cg->cg_time));
+ pr_debug(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx));
+ pr_debug(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
+ pr_debug(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk));
+ pr_debug(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
+ pr_debug(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
+ pr_debug(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
+ pr_debug(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
+ pr_debug(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
+ pr_debug(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor));
+ pr_debug(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor));
+ pr_debug(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor));
+ pr_debug(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n",
fs32_to_cpu(sb, cg->cg_frsum[0]), fs32_to_cpu(sb, cg->cg_frsum[1]),
fs32_to_cpu(sb, cg->cg_frsum[2]), fs32_to_cpu(sb, cg->cg_frsum[3]),
fs32_to_cpu(sb, cg->cg_frsum[4]), fs32_to_cpu(sb, cg->cg_frsum[5]),
fs32_to_cpu(sb, cg->cg_frsum[6]), fs32_to_cpu(sb, cg->cg_frsum[7]));
- printk(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
- printk(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff));
- printk(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
- printk(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
- printk(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
- printk(" clustersumoff %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
- printk(" clusteroff %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
- printk(" nclusterblks %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
- printk("\n");
+ pr_debug(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
+ pr_debug(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff));
+ pr_debug(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
+ pr_debug(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
+ pr_debug(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
+ pr_debug(" clustersumoff %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
+ pr_debug(" clusteroff %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
+ pr_debug(" nclusterblks %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
+ pr_debug("\n");
}
#else
# define ufs_print_super_stuff(sb, usb1, usb2, usb3) /**/
@@ -287,13 +286,12 @@ static void ufs_print_cylinder_stuff(struct super_block *sb,
static const struct super_operations ufs_super_ops;
-static char error_buf[1024];
-
void ufs_error (struct super_block * sb, const char * function,
const char * fmt, ...)
{
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
+ struct va_format vaf;
va_list args;
uspi = UFS_SB(sb)->s_uspi;
@@ -305,20 +303,21 @@ void ufs_error (struct super_block * sb, const char * function,
ufs_mark_sb_dirty(sb);
sb->s_flags |= MS_RDONLY;
}
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) {
case UFS_MOUNT_ONERROR_PANIC:
- panic ("UFS-fs panic (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ panic("panic (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
case UFS_MOUNT_ONERROR_LOCK:
case UFS_MOUNT_ONERROR_UMOUNT:
case UFS_MOUNT_ONERROR_REPAIR:
- printk (KERN_CRIT "UFS-fs error (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
- }
+ pr_crit("error (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ }
+ va_end(args);
}
void ufs_panic (struct super_block * sb, const char * function,
@@ -326,6 +325,7 @@ void ufs_panic (struct super_block * sb, const char * function,
{
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
+ struct va_format vaf;
va_list args;
uspi = UFS_SB(sb)->s_uspi;
@@ -336,24 +336,27 @@ void ufs_panic (struct super_block * sb, const char * function,
ubh_mark_buffer_dirty(USPI_UBH(uspi));
ufs_mark_sb_dirty(sb);
}
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
sb->s_flags |= MS_RDONLY;
- printk (KERN_CRIT "UFS-fs panic (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ pr_crit("panic (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ va_end(args);
}
void ufs_warning (struct super_block * sb, const char * function,
const char * fmt, ...)
{
+ struct va_format vaf;
va_list args;
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
- printk (KERN_WARNING "UFS-fs warning (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_warn("(device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ va_end(args);
}
enum {
@@ -464,14 +467,12 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
ufs_set_opt (*mount_options, ONERROR_UMOUNT);
break;
case Opt_onerror_repair:
- printk("UFS-fs: Unable to do repair on error, "
- "will lock lock instead\n");
+ pr_err("Unable to do repair on error, will lock lock instead\n");
ufs_clear_opt (*mount_options, ONERROR);
ufs_set_opt (*mount_options, ONERROR_REPAIR);
break;
default:
- printk("UFS-fs: Invalid option: \"%s\" "
- "or missing value\n", p);
+ pr_err("Invalid option: \"%s\" or missing value\n", p);
return 0;
}
}
@@ -788,8 +789,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
#ifndef CONFIG_UFS_FS_WRITE
if (!(sb->s_flags & MS_RDONLY)) {
- printk("ufs was compiled with read-only support, "
- "can't be mounted as read-write\n");
+ pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
return -EROFS;
}
#endif
@@ -812,12 +812,12 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_mount_opt = 0;
ufs_set_opt (sbi->s_mount_opt, ONERROR_LOCK);
if (!ufs_parse_options ((char *) data, &sbi->s_mount_opt)) {
- printk("wrong mount options\n");
+ pr_err("wrong mount options\n");
goto failed;
}
if (!(sbi->s_mount_opt & UFS_MOUNT_UFSTYPE)) {
if (!silent)
- printk("You didn't specify the type of your ufs filesystem\n\n"
+ pr_err("You didn't specify the type of your ufs filesystem\n\n"
"mount -t ufs -o ufstype="
"sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n"
">>>WARNING<<< Wrong ufstype may corrupt your filesystem, "
@@ -868,7 +868,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
break;
case UFS_MOUNT_UFSTYPE_SUNOS:
- UFSD(("ufstype=sunos\n"))
+ UFSD("ufstype=sunos\n");
uspi->s_fsize = block_size = 1024;
uspi->s_fmask = ~(1024 - 1);
uspi->s_fshift = 10;
@@ -900,7 +900,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=old is supported read-only\n");
+ pr_info("ufstype=old is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -916,7 +916,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=nextstep is supported read-only\n");
+ pr_info("ufstype=nextstep is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -932,7 +932,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=nextstep-cd is supported read-only\n");
+ pr_info("ufstype=nextstep-cd is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -948,7 +948,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=openstep is supported read-only\n");
+ pr_info("ufstype=openstep is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -963,19 +963,19 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=hp is supported read-only\n");
+ pr_info("ufstype=hp is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
default:
if (!silent)
- printk("unknown ufstype\n");
+ pr_err("unknown ufstype\n");
goto failed;
}
again:
if (!sb_set_blocksize(sb, block_size)) {
- printk(KERN_ERR "UFS: failed to set blocksize\n");
+ pr_err("failed to set blocksize\n");
goto failed;
}
@@ -1034,7 +1034,7 @@ again:
goto again;
}
if (!silent)
- printk("ufs_read_super: bad magic number\n");
+ pr_err("%s(): bad magic number\n", __func__);
goto failed;
magic_found:
@@ -1048,33 +1048,33 @@ magic_found:
uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift);
if (!is_power_of_2(uspi->s_fsize)) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n",
- uspi->s_fsize);
- goto failed;
+ pr_err("%s(): fragment size %u is not a power of 2\n",
+ __func__, uspi->s_fsize);
+ goto failed;
}
if (uspi->s_fsize < 512) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is too small\n",
- uspi->s_fsize);
+ pr_err("%s(): fragment size %u is too small\n",
+ __func__, uspi->s_fsize);
goto failed;
}
if (uspi->s_fsize > 4096) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is too large\n",
- uspi->s_fsize);
+ pr_err("%s(): fragment size %u is too large\n",
+ __func__, uspi->s_fsize);
goto failed;
}
if (!is_power_of_2(uspi->s_bsize)) {
- printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n",
- uspi->s_bsize);
+ pr_err("%s(): block size %u is not a power of 2\n",
+ __func__, uspi->s_bsize);
goto failed;
}
if (uspi->s_bsize < 4096) {
- printk(KERN_ERR "ufs_read_super: block size %u is too small\n",
- uspi->s_bsize);
+ pr_err("%s(): block size %u is too small\n",
+ __func__, uspi->s_bsize);
goto failed;
}
if (uspi->s_bsize / uspi->s_fsize > 8) {
- printk(KERN_ERR "ufs_read_super: too many fragments per block (%u)\n",
- uspi->s_bsize / uspi->s_fsize);
+ pr_err("%s(): too many fragments per block (%u)\n",
+ __func__, uspi->s_bsize / uspi->s_fsize);
goto failed;
}
if (uspi->s_fsize != block_size || uspi->s_sbsize != super_block_size) {
@@ -1113,20 +1113,21 @@ magic_found:
UFSD("fs is DEC OSF/1\n");
break;
case UFS_FSACTIVE:
- printk("ufs_read_super: fs is active\n");
+ pr_err("%s(): fs is active\n", __func__);
sb->s_flags |= MS_RDONLY;
break;
case UFS_FSBAD:
- printk("ufs_read_super: fs is bad\n");
+ pr_err("%s(): fs is bad\n", __func__);
sb->s_flags |= MS_RDONLY;
break;
default:
- printk("ufs_read_super: can't grok fs_clean 0x%x\n", usb1->fs_clean);
+ pr_err("%s(): can't grok fs_clean 0x%x\n",
+ __func__, usb1->fs_clean);
sb->s_flags |= MS_RDONLY;
break;
}
} else {
- printk("ufs_read_super: fs needs fsck\n");
+ pr_err("%s(): fs needs fsck\n", __func__);
sb->s_flags |= MS_RDONLY;
}
@@ -1299,7 +1300,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
new_mount_opt |= ufstype;
} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
- printk("ufstype can't be changed during remount\n");
+ pr_err("ufstype can't be changed during remount\n");
unlock_ufs(sb);
return -EINVAL;
}
@@ -1328,8 +1329,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
* fs was mounted as ro, remounting rw
*/
#ifndef CONFIG_UFS_FS_WRITE
- printk("ufs was compiled with read-only support, "
- "can't be mounted as read-write\n");
+ pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
unlock_ufs(sb);
return -EINVAL;
#else
@@ -1338,12 +1338,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufstype != UFS_MOUNT_UFSTYPE_44BSD &&
ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
- printk("this ufstype is read-only supported\n");
+ pr_err("this ufstype is read-only supported\n");
unlock_ufs(sb);
return -EINVAL;
}
if (!ufs_read_cylinder_structures(sb)) {
- printk("failed during remounting\n");
+ pr_err("failed during remounting\n");
unlock_ufs(sb);
return -EPERM;
}
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 343e6fc571e5..2a07396d5f9e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -1,6 +1,12 @@
#ifndef _UFS_UFS_H
#define _UFS_UFS_H 1
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#define UFS_MAX_GROUP_LOADED 8
#define UFS_CGNO_EMPTY ((unsigned)-1)
@@ -71,9 +77,9 @@ struct ufs_inode_info {
*/
#ifdef CONFIG_UFS_DEBUG
# define UFSD(f, a...) { \
- printk ("UFSD (%s, %d): %s:", \
+ pr_debug("UFSD (%s, %d): %s:", \
__FILE__, __LINE__, __func__); \
- printk (f, ## a); \
+ pr_debug(f, ## a); \
}
#else
# define UFSD(f, a...) /**/
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 7ad634501e48..e1c8d080c427 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -88,32 +88,32 @@
* lib/bitmap.c provides these functions:
*/
-extern int __bitmap_empty(const unsigned long *bitmap, int bits);
-extern int __bitmap_full(const unsigned long *bitmap, int bits);
+extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
+extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
extern int __bitmap_equal(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
- int bits);
+ unsigned int nbits);
extern void __bitmap_shift_right(unsigned long *dst,
const unsigned long *src, int shift, int bits);
extern void __bitmap_shift_left(unsigned long *dst,
const unsigned long *src, int shift, int bits);
extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern int __bitmap_intersects(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern int __bitmap_subset(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
-extern int __bitmap_weight(const unsigned long *bitmap, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
+extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
-extern void bitmap_set(unsigned long *map, int i, int len);
-extern void bitmap_clear(unsigned long *map, int start, int nr);
+extern void bitmap_set(unsigned long *map, unsigned int start, int len);
+extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
unsigned long size,
unsigned long start,
@@ -140,9 +140,9 @@ extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
const unsigned long *relmap, int bits);
extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
int sz, int bits);
-extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
-extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
-extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
+extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
+extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
+extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
@@ -188,15 +188,15 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
}
static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
- return (*dst = *src1 & *src2) != 0;
+ return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_and(dst, src1, src2, nbits);
}
static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = *src1 | *src2;
@@ -205,7 +205,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
}
static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = *src1 ^ *src2;
@@ -214,24 +214,24 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
}
static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
- return (*dst = *src1 & ~(*src2)) != 0;
+ return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_andnot(dst, src1, src2, nbits);
}
static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
- int nbits)
+ unsigned int nbits)
{
if (small_const_nbits(nbits))
- *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+ *dst = ~(*src);
else
__bitmap_complement(dst, src, nbits);
}
static inline int bitmap_equal(const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
@@ -240,7 +240,7 @@ static inline int bitmap_equal(const unsigned long *src1,
}
static inline int bitmap_intersects(const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
@@ -249,7 +249,7 @@ static inline int bitmap_intersects(const unsigned long *src1,
}
static inline int bitmap_subset(const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
@@ -257,7 +257,7 @@ static inline int bitmap_subset(const unsigned long *src1,
return __bitmap_subset(src1, src2, nbits);
}
-static inline int bitmap_empty(const unsigned long *src, int nbits)
+static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
{
if (small_const_nbits(nbits))
return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -265,7 +265,7 @@ static inline int bitmap_empty(const unsigned long *src, int nbits)
return __bitmap_empty(src, nbits);
}
-static inline int bitmap_full(const unsigned long *src, int nbits)
+static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -273,7 +273,7 @@ static inline int bitmap_full(const unsigned long *src, int nbits)
return __bitmap_full(src, nbits);
}
-static inline int bitmap_weight(const unsigned long *src, int nbits)
+static inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
{
if (small_const_nbits(nbits))
return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -284,7 +284,7 @@ static inline void bitmap_shift_right(unsigned long *dst,
const unsigned long *src, int n, int nbits)
{
if (small_const_nbits(nbits))
- *dst = *src >> n;
+ *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n;
else
__bitmap_shift_right(dst, src, n, nbits);
}
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 0846e6b931ce..89f67c1c3160 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -2,7 +2,7 @@
#define _LINUX_BYTEORDER_GENERIC_H
/*
- * linux/byteorder_generic.h
+ * linux/byteorder/generic.h
* Generic Byte-reordering support
*
* The "... p" macros, like le64_to_cpup, can be used with pointers
diff --git a/include/linux/cma.h b/include/linux/cma.h
new file mode 100644
index 000000000000..371b93042520
--- /dev/null
+++ b/include/linux/cma.h
@@ -0,0 +1,27 @@
+#ifndef __CMA_H__
+#define __CMA_H__
+
+/*
+ * There is always at least global CMA area and a few optional
+ * areas configured in kernel .config.
+ */
+#ifdef CONFIG_CMA_AREAS
+#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS)
+
+#else
+#define MAX_CMA_AREAS (0)
+
+#endif
+
+struct cma;
+
+extern phys_addr_t cma_get_base(struct cma *cma);
+extern unsigned long cma_get_size(struct cma *cma);
+
+extern int __init cma_declare_contiguous(phys_addr_t size,
+ phys_addr_t base, phys_addr_t limit,
+ phys_addr_t alignment, unsigned int order_per_bit,
+ bool fixed, struct cma **res_cma);
+extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
+extern bool cma_release(struct cma *cma, struct page *pages, int count);
+#endif
diff --git a/include/linux/crc64_ecma.h b/include/linux/crc64_ecma.h
new file mode 100644
index 000000000000..bba7a4d692b3
--- /dev/null
+++ b/include/linux/crc64_ecma.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CRC64_ECMA_H_
+#define __CRC64_ECMA_H_
+
+#include <linux/types.h>
+
+
+#define CRC64_DEFAULT_INITVAL 0xFFFFFFFFFFFFFFFFULL
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * @pdata: pointer to the data to compute checksum for.
+ * @nbytes: number of bytes in data buffer.
+ * @seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed);
+
+#endif /* __CRC64_ECMA_H_ */
diff --git a/include/linux/decompress/bunzip2.h b/include/linux/decompress/bunzip2.h
index 115272137a9c..4d683df898e6 100644
--- a/include/linux/decompress/bunzip2.h
+++ b/include/linux/decompress/bunzip2.h
@@ -1,10 +1,10 @@
#ifndef DECOMPRESS_BUNZIP2_H
#define DECOMPRESS_BUNZIP2_H
-int bunzip2(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int bunzip2(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error)(char *x));
#endif
diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h
index 0c7111a55a1a..1fcfd64b5076 100644
--- a/include/linux/decompress/generic.h
+++ b/include/linux/decompress/generic.h
@@ -1,11 +1,11 @@
#ifndef DECOMPRESS_GENERIC_H
#define DECOMPRESS_GENERIC_H
-typedef int (*decompress_fn) (unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+typedef int (*decompress_fn) (unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *outbuf,
- int *posp,
+ long *posp,
void(*error)(char *x));
/* inbuf - input buffer
@@ -33,7 +33,7 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len,
/* Utility routine to detect the decompression method */
-decompress_fn decompress_method(const unsigned char *inbuf, int len,
+decompress_fn decompress_method(const unsigned char *inbuf, long len,
const char **name);
#endif
diff --git a/include/linux/decompress/inflate.h b/include/linux/decompress/inflate.h
index 1d0aedef9822..e4f411fdbd24 100644
--- a/include/linux/decompress/inflate.h
+++ b/include/linux/decompress/inflate.h
@@ -1,10 +1,10 @@
#ifndef LINUX_DECOMPRESS_INFLATE_H
#define LINUX_DECOMPRESS_INFLATE_H
-int gunzip(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int gunzip(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error_fn)(char *x));
#endif
diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h
index d5b68bf3ec92..3273c2f36496 100644
--- a/include/linux/decompress/unlz4.h
+++ b/include/linux/decompress/unlz4.h
@@ -1,10 +1,10 @@
#ifndef DECOMPRESS_UNLZ4_H
#define DECOMPRESS_UNLZ4_H
-int unlz4(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int unlz4(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error)(char *x));
#endif
diff --git a/include/linux/decompress/unlzma.h b/include/linux/decompress/unlzma.h
index 7796538f1bf4..8a891a193840 100644
--- a/include/linux/decompress/unlzma.h
+++ b/include/linux/decompress/unlzma.h
@@ -1,11 +1,11 @@
#ifndef DECOMPRESS_UNLZMA_H
#define DECOMPRESS_UNLZMA_H
-int unlzma(unsigned char *, int,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int unlzma(unsigned char *, long,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
);
diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h
index 987229752519..af18f95d6570 100644
--- a/include/linux/decompress/unlzo.h
+++ b/include/linux/decompress/unlzo.h
@@ -1,10 +1,10 @@
#ifndef DECOMPRESS_UNLZO_H
#define DECOMPRESS_UNLZO_H
-int unlzo(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int unlzo(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error)(char *x));
#endif
diff --git a/include/linux/decompress/unxz.h b/include/linux/decompress/unxz.h
index 41728fc6c8a1..f764e2a7201e 100644
--- a/include/linux/decompress/unxz.h
+++ b/include/linux/decompress/unxz.h
@@ -10,10 +10,10 @@
#ifndef DECOMPRESS_UNXZ_H
#define DECOMPRESS_UNXZ_H
-int unxz(unsigned char *in, int in_size,
- int (*fill)(void *dest, unsigned int size),
- int (*flush)(void *src, unsigned int size),
- unsigned char *out, int *in_used,
+int unxz(unsigned char *in, long in_size,
+ long (*fill)(void *dest, unsigned long size),
+ long (*flush)(void *src, unsigned long size),
+ unsigned char *out, long *in_used,
void (*error)(char *x));
#endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 772eab5d524a..569bbd039896 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -53,18 +53,13 @@
#ifdef __KERNEL__
+#include <linux/device.h>
+
struct cma;
struct page;
-struct device;
#ifdef CONFIG_DMA_CMA
-/*
- * There is always at least global CMA area and a few optional device
- * private areas configured in kernel .config.
- */
-#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS)
-
extern struct cma *dma_contiguous_default_area;
static inline struct cma *dev_get_cma_area(struct device *dev)
@@ -123,8 +118,6 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
#else
-#define MAX_CMA_AREAS (0)
-
static inline struct cma *dev_get_cma_area(struct device *dev)
{
return NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2daccaf4b547..1ab6c6913040 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2688,7 +2688,7 @@ static const struct file_operations __fops = { \
.read = simple_attr_read, \
.write = simple_attr_write, \
.llseek = generic_file_llseek, \
-};
+}
static inline __printf(1, 2)
void __simple_attr_check_format(const char *fmt, ...)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6eb1fb37de9a..5e7219dc0fae 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -360,7 +360,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
void free_pages_exact(void *virt, size_t size);
/* This is different from alloc_pages_exact_node !!! */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
#define __get_free_page(gfp_mask) \
__get_free_pages((gfp_mask), 0)
diff --git a/include/linux/glob.h b/include/linux/glob.h
new file mode 100644
index 000000000000..c990b7fbf0a8
--- /dev/null
+++ b/include/linux/glob.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_GLOB_H
+#define _LINUX_GLOB_H
+
+#include <linux/types.h> /* For bool */
+#include <linux/compiler.h> /* For __pure */
+
+bool __pure glob_match(char const *pat, char const *str);
+
+#endif /* _LINUX_GLOB_H */
+
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b826239bdce0..63579cb8d3dc 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
#endif /* CONFIG_DEBUG_VM */
extern unsigned long transparent_hugepage_flags;
-extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pmd_t *dst_pmd, pmd_t *src_pmd,
- struct vm_area_struct *vma,
- unsigned long addr, unsigned long end);
extern int split_huge_page_to_list(struct page *page, struct list_head *list);
static inline int split_huge_page(struct page *page)
{
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a23c096b3080..6e6d338641fe 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -87,7 +87,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
#endif
extern unsigned long hugepages_treat_as_movable;
-extern const unsigned long hugetlb_zero, hugetlb_infinity;
extern int sysctl_hugetlb_shm_group;
extern struct list_head huge_boot_pages;
diff --git a/include/linux/input.h b/include/linux/input.h
index 82ce323b9986..6453b22372ac 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -79,6 +79,7 @@ struct input_value {
* @led: reflects current state of device's LEDs
* @snd: reflects current state of sound effects
* @sw: reflects current state of device's switches
+ * @leds: leds objects for the device's LEDs
* @open: this method is called when the very first user calls
* input_open_device(). The driver must prepare the device
* to start generating events (start polling thread,
@@ -164,6 +165,8 @@ struct input_dev {
unsigned long snd[BITS_TO_LONGS(SND_CNT)];
unsigned long sw[BITS_TO_LONGS(SW_CNT)];
+ struct led_classdev *leds;
+
int (*open)(struct input_dev *dev);
void (*close)(struct input_dev *dev);
int (*flush)(struct input_dev *dev, struct file *file);
@@ -531,4 +534,22 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
int input_ff_create_memless(struct input_dev *dev, void *data,
int (*play_effect)(struct input_dev *, void *, struct ff_effect *));
+#ifdef CONFIG_INPUT_LEDS
+
+int input_led_connect(struct input_dev *dev);
+void input_led_disconnect(struct input_dev *dev);
+
+#else
+
+static inline int input_led_connect(struct input_dev *dev)
+{
+ return 0;
+}
+
+static inline void input_led_disconnect(struct input_dev *dev)
+{
+}
+
+#endif
+
#endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a9e2268ecccb..e9892041cb41 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -470,6 +470,7 @@ extern enum system_states {
#define TAINT_FIRMWARE_WORKAROUND 11
#define TAINT_OOT_MODULE 12
#define TAINT_UNSIGNED_MODULE 13
+#define TAINT_SOFTLOCKUP 14
extern const char hex_asc[];
#define hex_asc_lo(x) hex_asc[((x) & 0x0f)]
@@ -493,11 +494,6 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
return buf;
}
-static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
-{
- return hex_byte_pack(buf, byte);
-}
-
extern int hex_to_bin(char ch);
extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
@@ -719,23 +715,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
(void) (&_max1 == &_max2); \
_max1 > _max2 ? _max1 : _max2; })
-#define min3(x, y, z) ({ \
- typeof(x) _min1 = (x); \
- typeof(y) _min2 = (y); \
- typeof(z) _min3 = (z); \
- (void) (&_min1 == &_min2); \
- (void) (&_min1 == &_min3); \
- _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
- (_min2 < _min3 ? _min2 : _min3); })
-
-#define max3(x, y, z) ({ \
- typeof(x) _max1 = (x); \
- typeof(y) _max2 = (y); \
- typeof(z) _max3 = (z); \
- (void) (&_max1 == &_max2); \
- (void) (&_max1 == &_max3); \
- _max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
- (_max2 > _max3 ? _max2 : _max3); })
+#define min3(x, y, z) min((typeof(x))min(x, y), z)
+#define max3(x, y, z) max((typeof(x))max(x, y), z)
/**
* min_not_zero - return the minimum that is _not_ zero, unless both are zero
@@ -750,20 +731,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
/**
* clamp - return a value clamped to a given range with strict typechecking
* @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
*
* This macro does strict typechecking of min/max to make sure they are of the
* same type as val. See the unnecessary pointer comparisons.
*/
-#define clamp(val, min, max) ({ \
- typeof(val) __val = (val); \
- typeof(min) __min = (min); \
- typeof(max) __max = (max); \
- (void) (&__val == &__min); \
- (void) (&__val == &__max); \
- __val = __val < __min ? __min: __val; \
- __val > __max ? __max: __val; })
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
/*
* ..and if you can't take the strict
diff --git a/include/linux/klist.h b/include/linux/klist.h
index a370ce57cf1d..61e5b723ae73 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -44,7 +44,7 @@ struct klist_node {
extern void klist_add_tail(struct klist_node *n, struct klist *k);
extern void klist_add_head(struct klist_node *n, struct klist *k);
-extern void klist_add_after(struct klist_node *n, struct klist_node *pos);
+extern void klist_add_behind(struct klist_node *n, struct klist_node *pos);
extern void klist_add_before(struct klist_node *n, struct klist_node *pos);
extern void klist_del(struct klist_node *n);
diff --git a/include/linux/list.h b/include/linux/list.h
index ef9594171062..cbbb96fcead9 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -654,15 +654,15 @@ static inline void hlist_add_before(struct hlist_node *n,
*(n->pprev) = n;
}
-static inline void hlist_add_after(struct hlist_node *n,
- struct hlist_node *next)
+static inline void hlist_add_behind(struct hlist_node *n,
+ struct hlist_node *prev)
{
- next->next = n->next;
- n->next = next;
- next->pprev = &n->next;
+ n->next = prev->next;
+ prev->next = n;
+ n->pprev = &prev->next;
- if(next->next)
- next->next->pprev = &next->next;
+ if (n->next)
+ n->next->pprev = &n->next;
}
/* after that we'll appear to be on some hlist and hlist_del will work */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b660e05b63d4..e8cc45307f8f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -249,7 +249,7 @@ phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
/*
* Set the allocation direction to bottom-up or top-down.
*/
-static inline void memblock_set_bottom_up(bool enable)
+static inline void __init memblock_set_bottom_up(bool enable)
{
memblock.bottom_up = enable;
}
@@ -264,7 +264,7 @@ static inline bool memblock_bottom_up(void)
return memblock.bottom_up;
}
#else
-static inline void memblock_set_bottom_up(bool enable) {}
+static inline void __init memblock_set_bottom_up(bool enable) {}
static inline bool memblock_bottom_up(void) { return false; }
#endif
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index eb65d29516ca..e0752d204d9e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,39 +54,20 @@ struct mem_cgroup_reclaim_cookie {
};
#ifdef CONFIG_MEMCG
-/*
- * All "charge" functions with gfp_mask should use GFP_KERNEL or
- * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
- * alloc memory but reclaims memory from all available zones. So, "where I want
- * memory from" bits of gfp_mask has no meaning. So any bits of that field is
- * available but adding a rule is better. charge functions' gfp_mask should
- * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
- * codes.
- * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
- */
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask, struct mem_cgroup **memcgp);
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+ bool lrucare);
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
+void mem_cgroup_uncharge(struct page *page);
+void mem_cgroup_uncharge_list(struct list_head *page_list);
-extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask);
-/* for swap handling */
-extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
- struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
-extern void mem_cgroup_commit_charge_swapin(struct page *page,
- struct mem_cgroup *memcg);
-extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
-
-extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask);
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+ bool lrucare);
struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
-/* For coalescing uncharge for reducing memcg' overhead*/
-extern void mem_cgroup_uncharge_start(void);
-extern void mem_cgroup_uncharge_end(void);
-
-extern void mem_cgroup_uncharge_page(struct page *page);
-extern void mem_cgroup_uncharge_cache_page(struct page *page);
-
bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
struct mem_cgroup *memcg);
bool task_in_mem_cgroup(struct task_struct *task,
@@ -113,12 +94,6 @@ bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
-extern void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
- struct mem_cgroup **memcgp);
-extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
- struct page *oldpage, struct page *newpage, bool migration_ok);
-
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
struct mem_cgroup *,
struct mem_cgroup_reclaim_cookie *);
@@ -133,8 +108,6 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
struct task_struct *p);
-extern void mem_cgroup_replace_page_cache(struct page *oldpage,
- struct page *newpage);
static inline void mem_cgroup_oom_enable(void)
{
@@ -233,46 +206,36 @@ void mem_cgroup_print_bad_page(struct page *page);
#else /* CONFIG_MEMCG */
struct mem_cgroup;
-static inline int mem_cgroup_charge_anon(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask)
-{
- return 0;
-}
-
-static inline int mem_cgroup_charge_file(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask)
-{
- return 0;
-}
-
-static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
- struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
+static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask,
+ struct mem_cgroup **memcgp)
{
+ *memcgp = NULL;
return 0;
}
-static inline void mem_cgroup_commit_charge_swapin(struct page *page,
- struct mem_cgroup *memcg)
-{
-}
-
-static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
+static inline void mem_cgroup_commit_charge(struct page *page,
+ struct mem_cgroup *memcg,
+ bool lrucare)
{
}
-static inline void mem_cgroup_uncharge_start(void)
+static inline void mem_cgroup_cancel_charge(struct page *page,
+ struct mem_cgroup *memcg)
{
}
-static inline void mem_cgroup_uncharge_end(void)
+static inline void mem_cgroup_uncharge(struct page *page)
{
}
-static inline void mem_cgroup_uncharge_page(struct page *page)
+static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
{
}
-static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+static inline void mem_cgroup_migrate(struct page *oldpage,
+ struct page *newpage,
+ bool lrucare)
{
}
@@ -311,17 +274,6 @@ static inline struct cgroup_subsys_state
return NULL;
}
-static inline void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
- struct mem_cgroup **memcgp)
-{
-}
-
-static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
- struct page *oldpage, struct page *newpage, bool migration_ok)
-{
-}
-
static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
@@ -417,10 +369,6 @@ static inline
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
{
}
-static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
- struct page *newpage)
-{
-}
#endif /* CONFIG_MEMCG */
#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 010d125bffbf..d9524c49d767 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -26,11 +26,12 @@ enum {
MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
};
-/* Types for control the zone type of onlined memory */
+/* Types for control the zone type of onlined and offlined memory */
enum {
- ONLINE_KEEP,
- ONLINE_KERNEL,
- ONLINE_MOVABLE,
+ MMOP_OFFLINE = -1,
+ MMOP_ONLINE_KEEP,
+ MMOP_ONLINE_KERNEL,
+ MMOP_ONLINE_MOVABLE,
};
/*
@@ -258,6 +259,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size);
+extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default);
extern int arch_add_memory(int nid, u64 start, u64 size);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 796deac19fcf..6e0b286649f1 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -461,6 +461,7 @@ static inline void mm_init_cpumask(struct mm_struct *mm)
#ifdef CONFIG_CPUMASK_OFFSTACK
mm->cpu_vm_mask_var = &mm->cpumask_allocation;
#endif
+ cpumask_clear(mm->cpu_vm_mask_var);
}
/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index edd82a105220..2f348d02f640 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -20,11 +20,13 @@ extern void dump_page_badflags(struct page *page, const char *reason,
} while (0)
#define VM_WARN_ON(cond) WARN_ON(cond)
#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
+#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
#else
#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
#ifdef CONFIG_DEBUG_VIRTUAL
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6cbd1b6c3d20..318df7051850 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -143,6 +143,7 @@ enum zone_stat_item {
NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
NR_DIRTIED, /* page dirtyings since bootup */
NR_WRITTEN, /* page writings since bootup */
+ NR_PAGES_SCANNED, /* pages scanned since last reclaim */
#ifdef CONFIG_NUMA
NUMA_HIT, /* allocated in intended node */
NUMA_MISS, /* allocated in non intended node */
@@ -324,19 +325,12 @@ enum zone_type {
#ifndef __GENERATING_BOUNDS_H
struct zone {
- /* Fields commonly accessed by the page allocator */
+ /* Read-mostly fields */
/* zone watermarks, access with *_wmark_pages(zone) macros */
unsigned long watermark[NR_WMARK];
/*
- * When free pages are below this point, additional steps are taken
- * when reading the number of free pages to avoid per-cpu counter
- * drift allowing watermarks to be breached
- */
- unsigned long percpu_drift_mark;
-
- /*
* We don't know if the memory that we're going to allocate will be freeable
* or/and it will be released eventually, so to avoid totally wasting several
* GB of ram we must reserve some of the lower zone memory (otherwise we risk
@@ -344,41 +338,26 @@ struct zone {
* on the higher zones). This array is recalculated at runtime if the
* sysctl_lowmem_reserve_ratio sysctl changes.
*/
- unsigned long lowmem_reserve[MAX_NR_ZONES];
-
- /*
- * This is a per-zone reserve of pages that should not be
- * considered dirtyable memory.
- */
- unsigned long dirty_balance_reserve;
+ long lowmem_reserve[MAX_NR_ZONES];
#ifdef CONFIG_NUMA
int node;
+#endif
+
/*
- * zone reclaim becomes active if more unmapped pages exist.
+ * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+ * this zone's LRU. Maintained by the pageout code.
*/
- unsigned long min_unmapped_pages;
- unsigned long min_slab_pages;
-#endif
+ unsigned int inactive_ratio;
+
+ struct pglist_data *zone_pgdat;
struct per_cpu_pageset __percpu *pageset;
+
/*
- * free areas of different sizes
+ * This is a per-zone reserve of pages that should not be
+ * considered dirtyable memory.
*/
- spinlock_t lock;
-#if defined CONFIG_COMPACTION || defined CONFIG_CMA
- /* Set to true when the PG_migrate_skip bits should be cleared */
- bool compact_blockskip_flush;
-
- /* pfn where compaction free scanner should start */
- unsigned long compact_cached_free_pfn;
- /* pfn where async and sync compaction migration scanner should start */
- unsigned long compact_cached_migrate_pfn[2];
-#endif
-#ifdef CONFIG_MEMORY_HOTPLUG
- /* see spanned/present_pages for more description */
- seqlock_t span_seqlock;
-#endif
- struct free_area free_area[MAX_ORDER];
+ unsigned long dirty_balance_reserve;
#ifndef CONFIG_SPARSEMEM
/*
@@ -388,74 +367,14 @@ struct zone {
unsigned long *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
-#ifdef CONFIG_COMPACTION
- /*
- * On compaction failure, 1<<compact_defer_shift compactions
- * are skipped before trying again. The number attempted since
- * last failure is tracked with compact_considered.
- */
- unsigned int compact_considered;
- unsigned int compact_defer_shift;
- int compact_order_failed;
-#endif
-
- ZONE_PADDING(_pad1_)
-
- /* Fields commonly accessed by the page reclaim scanner */
- spinlock_t lru_lock;
- struct lruvec lruvec;
-
- /* Evictions & activations on the inactive file list */
- atomic_long_t inactive_age;
-
- unsigned long pages_scanned; /* since last reclaim */
- unsigned long flags; /* zone flags, see below */
-
- /* Zone statistics */
- atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
-
- /*
- * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
- * this zone's LRU. Maintained by the pageout code.
- */
- unsigned int inactive_ratio;
-
-
- ZONE_PADDING(_pad2_)
- /* Rarely used or read-mostly fields */
-
+#ifdef CONFIG_NUMA
/*
- * wait_table -- the array holding the hash table
- * wait_table_hash_nr_entries -- the size of the hash table array
- * wait_table_bits -- wait_table_size == (1 << wait_table_bits)
- *
- * The purpose of all these is to keep track of the people
- * waiting for a page to become available and make them
- * runnable again when possible. The trouble is that this
- * consumes a lot of space, especially when so few things
- * wait on pages at a given time. So instead of using
- * per-page waitqueues, we use a waitqueue hash table.
- *
- * The bucket discipline is to sleep on the same queue when
- * colliding and wake all in that wait queue when removing.
- * When something wakes, it must check to be sure its page is
- * truly available, a la thundering herd. The cost of a
- * collision is great, but given the expected load of the
- * table, they should be so rare as to be outweighed by the
- * benefits from the saved space.
- *
- * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
- * primary users of these fields, and in mm/page_alloc.c
- * free_area_init_core() performs the initialization of them.
+ * zone reclaim becomes active if more unmapped pages exist.
*/
- wait_queue_head_t * wait_table;
- unsigned long wait_table_hash_nr_entries;
- unsigned long wait_table_bits;
+ unsigned long min_unmapped_pages;
+ unsigned long min_slab_pages;
+#endif /* CONFIG_NUMA */
- /*
- * Discontig memory support fields.
- */
- struct pglist_data *zone_pgdat;
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;
@@ -500,9 +419,11 @@ struct zone {
* adjust_managed_page_count() should be used instead of directly
* touching zone->managed_pages and totalram_pages.
*/
+ unsigned long managed_pages;
unsigned long spanned_pages;
unsigned long present_pages;
- unsigned long managed_pages;
+
+ const char *name;
/*
* Number of MIGRATE_RESEVE page block. To maintain for just
@@ -510,10 +431,94 @@ struct zone {
*/
int nr_migrate_reserve_block;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /* see spanned/present_pages for more description */
+ seqlock_t span_seqlock;
+#endif
+
/*
- * rarely used fields:
+ * wait_table -- the array holding the hash table
+ * wait_table_hash_nr_entries -- the size of the hash table array
+ * wait_table_bits -- wait_table_size == (1 << wait_table_bits)
+ *
+ * The purpose of all these is to keep track of the people
+ * waiting for a page to become available and make them
+ * runnable again when possible. The trouble is that this
+ * consumes a lot of space, especially when so few things
+ * wait on pages at a given time. So instead of using
+ * per-page waitqueues, we use a waitqueue hash table.
+ *
+ * The bucket discipline is to sleep on the same queue when
+ * colliding and wake all in that wait queue when removing.
+ * When something wakes, it must check to be sure its page is
+ * truly available, a la thundering herd. The cost of a
+ * collision is great, but given the expected load of the
+ * table, they should be so rare as to be outweighed by the
+ * benefits from the saved space.
+ *
+ * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
+ * primary users of these fields, and in mm/page_alloc.c
+ * free_area_init_core() performs the initialization of them.
*/
- const char *name;
+ wait_queue_head_t *wait_table;
+ unsigned long wait_table_hash_nr_entries;
+ unsigned long wait_table_bits;
+
+ ZONE_PADDING(_pad1_)
+
+ /* Write-intensive fields used from the page allocator */
+ spinlock_t lock;
+
+ /* free areas of different sizes */
+ struct free_area free_area[MAX_ORDER];
+
+ /* zone flags, see below */
+ unsigned long flags;
+
+ ZONE_PADDING(_pad2_)
+
+ /* Write-intensive fields used by page reclaim */
+
+ /* Fields commonly accessed by the page reclaim scanner */
+ spinlock_t lru_lock;
+ struct lruvec lruvec;
+
+ /* Evictions & activations on the inactive file list */
+ atomic_long_t inactive_age;
+
+ /*
+ * When free pages are below this point, additional steps are taken
+ * when reading the number of free pages to avoid per-cpu counter
+ * drift allowing watermarks to be breached
+ */
+ unsigned long percpu_drift_mark;
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+ /* pfn where compaction free scanner should start */
+ unsigned long compact_cached_free_pfn;
+ /* pfn where async and sync compaction migration scanner should start */
+ unsigned long compact_cached_migrate_pfn[2];
+#endif
+
+#ifdef CONFIG_COMPACTION
+ /*
+ * On compaction failure, 1<<compact_defer_shift compactions
+ * are skipped before trying again. The number attempted since
+ * last failure is tracked with compact_considered.
+ */
+ unsigned int compact_considered;
+ unsigned int compact_defer_shift;
+ int compact_order_failed;
+#endif
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+ /* Set to true when the PG_migrate_skip bits should be cleared */
+ bool compact_blockskip_flush;
+#endif
+
+ ZONE_PADDING(_pad3_)
+ /* Zone statistics */
+ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
} ____cacheline_internodealigned_in_smp;
typedef enum {
@@ -529,6 +534,7 @@ typedef enum {
ZONE_WRITEBACK, /* reclaim scanning has recently found
* many pages under writeback
*/
+ ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
} zone_flags_t;
static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -566,6 +572,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone)
return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
}
+static inline int zone_is_fair_depleted(const struct zone *zone)
+{
+ return test_bit(ZONE_FAIR_DEPLETED, &zone->flags);
+}
+
static inline int zone_is_oom_locked(const struct zone *zone)
{
return test_bit(ZONE_OOM_LOCKED, &zone->flags);
@@ -872,6 +883,8 @@ static inline int zone_movable_is_highmem(void)
{
#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
return movable_zone == ZONE_HIGHMEM;
+#elif defined(CONFIG_HIGHMEM)
+ return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
#else
return 0;
#endif
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 58b9a02c38d2..83a6aeda899d 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -430,7 +430,15 @@ static inline int num_node_state(enum node_states state)
for_each_node_mask((__node), node_states[__state])
#define first_online_node first_node(node_states[N_ONLINE])
-#define next_online_node(nid) next_node((nid), node_states[N_ONLINE])
+#define first_memory_node first_node(node_states[N_MEMORY])
+static inline int next_online_node(int nid)
+{
+ return next_node(nid, node_states[N_ONLINE]);
+}
+static inline int next_memory_node(int nid)
+{
+ return next_node(nid, node_states[N_MEMORY]);
+}
extern int nr_node_ids;
extern int nr_online_nodes;
@@ -471,6 +479,7 @@ static inline int num_node_state(enum node_states state)
for ( (node) = 0; (node) == 0; (node) = 1)
#define first_online_node 0
+#define first_memory_node 0
#define next_online_node(nid) (MAX_NUMNODES)
#define nr_node_ids 1
#define nr_online_nodes 1
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 4cd62677feb9..647395a1a550 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -55,8 +55,8 @@ extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
struct mem_cgroup *memcg, nodemask_t *nodemask,
const char *message);
-extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
-extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
+extern bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_flags);
+extern void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_flags);
extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
int order, const nodemask_t *nodemask);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8304959ad336..e1f5fcd79792 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -171,13 +171,12 @@ static inline int __TestClearPage##uname(struct page *page) \
#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \
__SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname)
-#define PAGEFLAG_FALSE(uname) \
-static inline int Page##uname(const struct page *page) \
- { return 0; }
-
#define TESTSCFLAG(uname, lname) \
TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
+#define TESTPAGEFLAG_FALSE(uname) \
+static inline int Page##uname(const struct page *page) { return 0; }
+
#define SETPAGEFLAG_NOOP(uname) \
static inline void SetPage##uname(struct page *page) { }
@@ -187,12 +186,21 @@ static inline void ClearPage##uname(struct page *page) { }
#define __CLEARPAGEFLAG_NOOP(uname) \
static inline void __ClearPage##uname(struct page *page) { }
+#define TESTSETFLAG_FALSE(uname) \
+static inline int TestSetPage##uname(struct page *page) { return 0; }
+
#define TESTCLEARFLAG_FALSE(uname) \
static inline int TestClearPage##uname(struct page *page) { return 0; }
#define __TESTCLEARFLAG_FALSE(uname) \
static inline int __TestClearPage##uname(struct page *page) { return 0; }
+#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \
+ SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+
+#define TESTSCFLAG_FALSE(uname) \
+ TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
+
struct page; /* forward declaration */
TESTPAGEFLAG(Locked, locked)
@@ -248,7 +256,6 @@ PAGEFLAG_FALSE(HighMem)
PAGEFLAG(SwapCache, swapcache)
#else
PAGEFLAG_FALSE(SwapCache)
- SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
#endif
PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
@@ -258,8 +265,8 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
#else
-PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
- TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
+ TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 777a524716db..5c831f1eca79 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -3,17 +3,15 @@
enum {
/* flags for mem_cgroup */
- PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
- PCG_USED, /* this object is in use. */
- PCG_MIGRATION, /* under page migration */
- __NR_PCG_FLAGS,
+ PCG_USED = 0x01, /* This page is charged to a memcg */
+ PCG_MEM = 0x02, /* This page holds a memory charge */
+ PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
};
-#ifndef __GENERATING_BOUNDS_H
-#include <generated/bounds.h>
+struct pglist_data;
#ifdef CONFIG_MEMCG
-#include <linux/bit_spinlock.h>
+struct mem_cgroup;
/*
* Page Cgroup can be considered as an extended mem_map.
@@ -27,65 +25,30 @@ struct page_cgroup {
struct mem_cgroup *mem_cgroup;
};
-void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
+extern void pgdat_page_cgroup_init(struct pglist_data *pgdat);
#ifdef CONFIG_SPARSEMEM
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
{
}
-extern void __init page_cgroup_init(void);
+extern void page_cgroup_init(void);
#else
-void __init page_cgroup_init_flatmem(void);
-static inline void __init page_cgroup_init(void)
+extern void page_cgroup_init_flatmem(void);
+static inline void page_cgroup_init(void)
{
}
#endif
struct page_cgroup *lookup_page_cgroup(struct page *page);
-struct page *lookup_cgroup_page(struct page_cgroup *pc);
-
-#define TESTPCGFLAG(uname, lname) \
-static inline int PageCgroup##uname(struct page_cgroup *pc) \
- { return test_bit(PCG_##lname, &pc->flags); }
-
-#define SETPCGFLAG(uname, lname) \
-static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
- { set_bit(PCG_##lname, &pc->flags); }
-
-#define CLEARPCGFLAG(uname, lname) \
-static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
- { clear_bit(PCG_##lname, &pc->flags); }
-
-#define TESTCLEARPCGFLAG(uname, lname) \
-static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
- { return test_and_clear_bit(PCG_##lname, &pc->flags); }
-
-TESTPCGFLAG(Used, USED)
-CLEARPCGFLAG(Used, USED)
-SETPCGFLAG(Used, USED)
-
-SETPCGFLAG(Migration, MIGRATION)
-CLEARPCGFLAG(Migration, MIGRATION)
-TESTPCGFLAG(Migration, MIGRATION)
-static inline void lock_page_cgroup(struct page_cgroup *pc)
+static inline int PageCgroupUsed(struct page_cgroup *pc)
{
- /*
- * Don't take this lock in IRQ context.
- * This lock is for pc->mem_cgroup, USED, MIGRATION
- */
- bit_spin_lock(PCG_LOCK, &pc->flags);
+ return !!(pc->flags & PCG_USED);
}
-
-static inline void unlock_page_cgroup(struct page_cgroup *pc)
-{
- bit_spin_unlock(PCG_LOCK, &pc->flags);
-}
-
-#else /* CONFIG_MEMCG */
+#else /* !CONFIG_MEMCG */
struct page_cgroup;
-static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
+static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
}
@@ -98,10 +61,9 @@ static inline void page_cgroup_init(void)
{
}
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
{
}
-
#endif /* CONFIG_MEMCG */
#include <linux/swap.h>
@@ -140,6 +102,4 @@ static inline void swap_cgroup_swapoff(int type)
#endif /* CONFIG_MEMCG_SWAP */
-#endif /* !__GENERATING_BOUNDS_H */
-
#endif /* __LINUX_PAGE_CGROUP_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e1474ae18c88..3df8c7db7a4e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -484,6 +484,9 @@ static inline int lock_page_killable(struct page *page)
/*
* lock_page_or_retry - Lock the page, unless this would block and the
* caller indicated that it can handle a retry.
+ *
+ * Return value and mmap_sem implications depend on flags; see
+ * __lock_page_or_retry().
*/
static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 319ff7e53efb..0990997a5304 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -31,7 +31,7 @@ static inline const char *printk_skip_level(const char *buffer)
}
/* printk's without a loglevel use this.. */
-#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
+#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
/* We show everything that is MORE important than this.. */
#define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8183b46fbaa2..372ad5e0dcb8 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -432,9 +432,9 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
}
/**
- * hlist_add_after_rcu
- * @prev: the existing element to add the new element after.
+ * hlist_add_behind_rcu
* @n: the new element to add to the hash list.
+ * @prev: the existing element to add the new element after.
*
* Description:
* Adds the specified element to the specified hlist
@@ -449,8 +449,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
* hlist_for_each_entry_rcu(), used to prevent memory-consistency
* problems on Alpha CPUs.
*/
-static inline void hlist_add_after_rcu(struct hlist_node *prev,
- struct hlist_node *n)
+static inline void hlist_add_behind_rcu(struct hlist_node *n,
+ struct hlist_node *prev)
{
n->next = prev->next;
n->pprev = &prev->next;
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 5059994fe297..9fc2f213e74f 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -384,11 +384,16 @@ void rio_dev_put(struct rio_dev *);
#ifdef CONFIG_RAPIDIO_DMA_ENGINE
extern struct dma_chan *rio_request_dma(struct rio_dev *rdev);
+extern struct dma_chan *rio_request_mport_dma(struct rio_mport *mport);
extern void rio_release_dma(struct dma_chan *dchan);
extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(
struct rio_dev *rdev, struct dma_chan *dchan,
struct rio_dma_data *data,
enum dma_transfer_direction direction, unsigned long flags);
+extern struct dma_async_tx_descriptor *rio_dma_prep_xfer(
+ struct dma_chan *dchan, u16 destid,
+ struct rio_dma_data *data,
+ enum dma_transfer_direction direction, unsigned long flags);
#endif
/**
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index f4ec8bbcb372..ed8f9e70df9b 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -136,7 +136,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
struct scatterlist *sgl)
{
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
BUG();
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a555f375bdb5..857ba40426ba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -33,6 +33,7 @@ struct sched_param {
#include <linux/smp.h>
#include <linux/sem.h>
+#include <linux/shm.h>
#include <linux/signal.h>
#include <linux/compiler.h>
#include <linux/completion.h>
@@ -1385,6 +1386,7 @@ struct task_struct {
#ifdef CONFIG_SYSVIPC
/* ipc stuff */
struct sysv_sem sysvsem;
+ struct sysv_shm sysvshm;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */
@@ -1628,12 +1630,6 @@ struct task_struct {
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
- struct memcg_batch_info {
- int do_batch; /* incremented when batch uncharge started */
- struct mem_cgroup *memcg; /* target memcg of uncharge */
- unsigned long nr_pages; /* uncharged usage */
- unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
- } memcg_batch;
unsigned int memcg_kmem_skip_account;
struct memcg_oom_info {
struct mem_cgroup *memcg;
@@ -2969,15 +2965,10 @@ static inline void inc_syscw(struct task_struct *tsk)
#ifdef CONFIG_MEMCG
extern void mm_update_next_owner(struct mm_struct *mm);
-extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
#else
static inline void mm_update_next_owner(struct mm_struct *mm)
{
}
-
-static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
-}
#endif /* CONFIG_MEMCG */
static inline unsigned long task_rlimit(const struct task_struct *tsk,
diff --git a/include/linux/shm.h b/include/linux/shm.h
index 57d77709fbe2..6fb801686ad6 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -1,6 +1,7 @@
#ifndef _LINUX_SHM_H_
#define _LINUX_SHM_H_
+#include <linux/list.h>
#include <asm/page.h>
#include <uapi/linux/shm.h>
#include <asm/shmparam.h>
@@ -20,6 +21,7 @@ struct shmid_kernel /* private to the kernel */
/* The task created the shm object. NULL if the task is dead. */
struct task_struct *shm_creator;
+ struct list_head shm_clist; /* list by creator */
};
/* shm_mode upper byte flags */
@@ -44,11 +46,20 @@ struct shmid_kernel /* private to the kernel */
#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
#ifdef CONFIG_SYSVIPC
+struct sysv_shm {
+ struct list_head shm_clist;
+};
+
long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
unsigned long shmlba);
-extern int is_file_shm_hugepages(struct file *file);
-extern void exit_shm(struct task_struct *task);
+int is_file_shm_hugepages(struct file *file);
+void exit_shm(struct task_struct *task);
+#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
#else
+struct sysv_shm {
+ /* empty */
+};
+
static inline long do_shmat(int shmid, char __user *shmaddr,
int shmflg, unsigned long *addr,
unsigned long shmlba)
@@ -62,6 +73,9 @@ static inline int is_file_shm_hugepages(struct file *file)
static inline void exit_shm(struct task_struct *task)
{
}
+static inline void shm_init_task(struct task_struct *task)
+{
+}
#endif
#endif /* _LINUX_SHM_H_ */
diff --git a/include/linux/string.h b/include/linux/string.h
index d36977e029af..2663afcc0861 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -117,6 +117,7 @@ void *memchr_inv(const void *s, int c, size_t n);
extern char *kstrdup(const char *s, gfp_t gfp);
extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
+extern char *kstrimdup(const char *s, gfp_t gfp);
extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4bdbee80eede..1b72060f093a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -311,7 +311,6 @@ extern void lru_add_page_tail(struct page *page, struct page *page_tail,
struct lruvec *lruvec, struct list_head *head);
extern void activate_page(struct page *);
extern void mark_page_accessed(struct page *);
-extern void init_page_accessed(struct page *page);
extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_all(void);
@@ -321,6 +320,9 @@ extern void swap_setup(void);
extern void add_page_to_unevictable_list(struct page *page);
+extern void lru_cache_add_active_or_unevictable(struct page *page,
+ struct vm_area_struct *vma);
+
/* linux/mm/vmscan.c */
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
@@ -379,9 +381,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
}
#endif
#ifdef CONFIG_MEMCG_SWAP
-extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
+extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
+extern void mem_cgroup_uncharge_swap(swp_entry_t entry);
#else
-static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
+static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+}
+static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
{
}
#endif
@@ -441,7 +447,7 @@ extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern void swap_free(swp_entry_t);
-extern void swapcache_free(swp_entry_t, struct page *page);
+extern void swapcache_free(swp_entry_t);
extern int free_swap_and_cache(swp_entry_t);
extern int swap_type_of(dev_t, sector_t, struct block_device **);
extern unsigned int count_swap_pages(int, int);
@@ -505,7 +511,7 @@ static inline void swap_free(swp_entry_t swp)
{
}
-static inline void swapcache_free(swp_entry_t swp, struct page *page)
+static inline void swapcache_free(swp_entry_t swp)
{
}
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 14a8ff2de11e..b7361f831226 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -34,8 +34,6 @@ struct ctl_table_root;
struct ctl_table_header;
struct ctl_dir;
-typedef struct ctl_table ctl_table;
-
typedef int proc_handler (struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4836ba3c1cd8..e95372654f09 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -57,9 +57,9 @@ static inline void put_user_ns(struct user_namespace *ns)
}
struct seq_operations;
-extern struct seq_operations proc_uid_seq_operations;
-extern struct seq_operations proc_gid_seq_operations;
-extern struct seq_operations proc_projid_seq_operations;
+extern const struct seq_operations proc_uid_seq_operations;
+extern const struct seq_operations proc_gid_seq_operations;
+extern const struct seq_operations proc_projid_seq_operations;
extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 4b8a89189a29..b87696fdf06a 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -113,7 +113,7 @@ extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
- struct page ***pages);
+ struct page **pages);
#ifdef CONFIG_MMU
extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
pgprot_t prot, struct page **pages);
diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 13af0d450bf6..f9d41a6e361f 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -11,7 +11,7 @@ struct zbud_ops {
struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
void zbud_destroy_pool(struct zbud_pool *pool);
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
unsigned long *handle);
void zbud_free(struct zbud_pool *pool, unsigned long handle);
int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
diff --git a/include/linux/zlib.h b/include/linux/zlib.h
index 9c5a6b4de0a3..92dbbd3f6c75 100644
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -83,11 +83,11 @@ struct internal_state;
typedef struct z_stream_s {
const Byte *next_in; /* next input byte */
- uInt avail_in; /* number of bytes available at next_in */
+ uLong avail_in; /* number of bytes available at next_in */
uLong total_in; /* total nb of input bytes read so far */
Byte *next_out; /* next output byte should be put there */
- uInt avail_out; /* remaining free space at next_out */
+ uLong avail_out; /* remaining free space at next_out */
uLong total_out; /* total nb of bytes output so far */
char *msg; /* last error message, NULL if no error */
@@ -493,64 +493,6 @@ extern int deflateInit2 (z_streamp strm,
method). msg is set to null if there is no error message. deflateInit2 does
not perform any compression: this will be done by deflate().
*/
-
-#if 0
-extern int zlib_deflateSetDictionary (z_streamp strm,
- const Byte *dictionary,
- uInt dictLength);
-#endif
-/*
- Initializes the compression dictionary from the given byte sequence
- without producing any compressed output. This function must be called
- immediately after deflateInit, deflateInit2 or deflateReset, before any
- call of deflate. The compressor and decompressor must use exactly the same
- dictionary (see inflateSetDictionary).
-
- The dictionary should consist of strings (byte sequences) that are likely
- to be encountered later in the data to be compressed, with the most commonly
- used strings preferably put towards the end of the dictionary. Using a
- dictionary is most useful when the data to be compressed is short and can be
- predicted with good accuracy; the data can then be compressed better than
- with the default empty dictionary.
-
- Depending on the size of the compression data structures selected by
- deflateInit or deflateInit2, a part of the dictionary may in effect be
- discarded, for example if the dictionary is larger than the window size in
- deflate or deflate2. Thus the strings most likely to be useful should be
- put at the end of the dictionary, not at the front.
-
- Upon return of this function, strm->adler is set to the Adler32 value
- of the dictionary; the decompressor may later use this value to determine
- which dictionary has been used by the compressor. (The Adler32 value
- applies to the whole dictionary even if only a subset of the dictionary is
- actually used by the compressor.)
-
- deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
- parameter is invalid (such as NULL dictionary) or the stream state is
- inconsistent (for example if deflate has already been called for this stream
- or if the compression method is bsort). deflateSetDictionary does not
- perform any compression: this will be done by deflate().
-*/
-
-#if 0
-extern int zlib_deflateCopy (z_streamp dest, z_streamp source);
-#endif
-
-/*
- Sets the destination stream as a complete copy of the source stream.
-
- This function can be useful when several compression strategies will be
- tried, for example when there are several ways of pre-processing the input
- data with a filter. The streams that will be discarded should then be freed
- by calling deflateEnd. Note that deflateCopy duplicates the internal
- compression state which can be quite large, so this strategy is slow and
- can consume lots of memory.
-
- deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
- enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
- (such as zalloc being NULL). msg is left unchanged in both source and
- destination.
-*/
extern int zlib_deflateReset (z_streamp strm);
/*
@@ -568,27 +510,6 @@ static inline unsigned long deflateBound(unsigned long s)
return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
}
-#if 0
-extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
-#endif
-/*
- Dynamically update the compression level and compression strategy. The
- interpretation of level and strategy is as in deflateInit2. This can be
- used to switch between compression and straight copy of the input data, or
- to switch to a different kind of input data requiring a different
- strategy. If the compression level is changed, the input available so far
- is compressed with the old level (and may be flushed); the new level will
- take effect only at the next call of deflate().
-
- Before the call of deflateParams, the stream state must be set as for
- a call of deflate(), since the currently available input may have to
- be compressed and flushed. In particular, strm->avail_out must be non-zero.
-
- deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
- stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
- if strm->avail_out was zero.
-*/
-
/*
extern int inflateInit2 (z_streamp strm, int windowBits);
@@ -631,45 +552,6 @@ extern int inflateInit2 (z_streamp strm, int windowBits);
and avail_out are unchanged.)
*/
-extern int zlib_inflateSetDictionary (z_streamp strm,
- const Byte *dictionary,
- uInt dictLength);
-/*
- Initializes the decompression dictionary from the given uncompressed byte
- sequence. This function must be called immediately after a call of inflate,
- if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
- can be determined from the adler32 value returned by that call of inflate.
- The compressor and decompressor must use exactly the same dictionary (see
- deflateSetDictionary). For raw inflate, this function can be called
- immediately after inflateInit2() or inflateReset() and before any call of
- inflate() to set the dictionary. The application must insure that the
- dictionary that was used for compression is provided.
-
- inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
- parameter is invalid (such as NULL dictionary) or the stream state is
- inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
- expected one (incorrect adler32 value). inflateSetDictionary does not
- perform any decompression: this will be done by subsequent calls of
- inflate().
-*/
-
-#if 0
-extern int zlib_inflateSync (z_streamp strm);
-#endif
-/*
- Skips invalid compressed data until a full flush point (see above the
- description of deflate with Z_FULL_FLUSH) can be found, or until all
- available input is skipped. No output is provided.
-
- inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
- if no more input was provided, Z_DATA_ERROR if no flush point has been found,
- or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
- case, the application may save the current current value of total_in which
- indicates where valid compressed data was found. In the error case, the
- application may repeatedly call inflateSync, providing more input each time,
- until success or end of the input data.
-*/
-
extern int zlib_inflateReset (z_streamp strm);
/*
This function is equivalent to inflateEnd followed by inflateInit,
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
new file mode 100644
index 000000000000..f14bd75f08b3
--- /dev/null
+++ b/include/linux/zpool.h
@@ -0,0 +1,106 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for the zbud and zsmalloc memory
+ * storage pool implementations. Typically, this is used to
+ * store compressed memory.
+ */
+
+#ifndef _ZPOOL_H_
+#define _ZPOOL_H_
+
+struct zpool;
+
+struct zpool_ops {
+ int (*evict)(struct zpool *pool, unsigned long handle);
+};
+
+/*
+ * Control how a handle is mapped. It will be ignored if the
+ * implementation does not support it. Its use is optional.
+ * Note that this does not refer to memory protection, it
+ * refers to how the memory will be copied in/out if copying
+ * is necessary during mapping; read-write is the safest as
+ * it copies the existing memory in on map, and copies the
+ * changed memory back out on unmap. Write-only does not copy
+ * in the memory and should only be used for initialization.
+ * If in doubt, use ZPOOL_MM_DEFAULT which is read-write.
+ */
+enum zpool_mapmode {
+ ZPOOL_MM_RW, /* normal read-write mapping */
+ ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */
+ ZPOOL_MM_WO, /* write-only (no copy-in at map time) */
+
+ ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
+};
+
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops);
+
+char *zpool_get_type(struct zpool *pool);
+
+void zpool_destroy_pool(struct zpool *pool);
+
+int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
+ unsigned long *handle);
+
+void zpool_free(struct zpool *pool, unsigned long handle);
+
+int zpool_shrink(struct zpool *pool, unsigned int pages,
+ unsigned int *reclaimed);
+
+void *zpool_map_handle(struct zpool *pool, unsigned long handle,
+ enum zpool_mapmode mm);
+
+void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
+
+u64 zpool_get_total_size(struct zpool *pool);
+
+
+/**
+ * struct zpool_driver - driver implementation for zpool
+ * @type: name of the driver.
+ * @list: entry in the list of zpool drivers.
+ * @create: create a new pool.
+ * @destroy: destroy a pool.
+ * @malloc: allocate mem from a pool.
+ * @free: free mem from a pool.
+ * @shrink: shrink the pool.
+ * @map: map a handle.
+ * @unmap: unmap a handle.
+ * @total_size: get total size of a pool.
+ *
+ * This is created by a zpool implementation and registered
+ * with zpool.
+ */
+struct zpool_driver {
+ char *type;
+ struct module *owner;
+ atomic_t refcount;
+ struct list_head list;
+
+ void *(*create)(gfp_t gfp, struct zpool_ops *ops);
+ void (*destroy)(void *pool);
+
+ int (*malloc)(void *pool, size_t size, gfp_t gfp,
+ unsigned long *handle);
+ void (*free)(void *pool, unsigned long handle);
+
+ int (*shrink)(void *pool, unsigned int pages,
+ unsigned int *reclaimed);
+
+ void *(*map)(void *pool, unsigned long handle,
+ enum zpool_mapmode mm);
+ void (*unmap)(void *pool, unsigned long handle);
+
+ u64 (*total_size)(void *pool);
+};
+
+void zpool_register_driver(struct zpool_driver *driver);
+
+int zpool_unregister_driver(struct zpool_driver *driver);
+
+int zpool_evict(void *pool, unsigned long handle);
+
+#endif
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index e6df23cae7be..261e708010da 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -31,7 +31,7 @@ enum scsi_timeouts {
* Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
* is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
*/
-#ifdef ARCH_HAS_SG_CHAIN
+#ifdef CONFIG_ARCH_HAS_SG_CHAIN
#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048
#else
#define SCSI_MAX_SG_CHAIN_SEGMENTS SCSI_MAX_SG_SEGMENTS
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 4e4f2f8b1ac2..dd2b5467d905 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -17,6 +17,7 @@
{MR_MEMORY_HOTPLUG, "memory_hotplug"}, \
{MR_SYSCALL, "syscall_or_cpuset"}, \
{MR_MEMPOLICY_MBIND, "mempolicy_mbind"}, \
+ {MR_NUMA_MISPLACED, "numa_misplaced"}, \
{MR_CMA, "cma"}
TRACE_EVENT(mm_migrate_pages,
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
index 1c9fabde69e4..ce0803b8d05f 100644
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -28,12 +28,10 @@ TRACE_EVENT(mm_lru_insertion,
TP_PROTO(
struct page *page,
- unsigned long pfn,
- int lru,
- unsigned long flags
+ int lru
),
- TP_ARGS(page, pfn, lru, flags),
+ TP_ARGS(page, lru),
TP_STRUCT__entry(
__field(struct page *, page )
@@ -44,9 +42,9 @@ TRACE_EVENT(mm_lru_insertion,
TP_fast_assign(
__entry->page = page;
- __entry->pfn = pfn;
+ __entry->pfn = page_to_pfn(page);
__entry->lru = lru;
- __entry->flags = flags;
+ __entry->flags = trace_pagemap_flags(page);
),
/* Flag format is based on page-types.c formatting for pagemap */
@@ -64,9 +62,9 @@ TRACE_EVENT(mm_lru_insertion,
TRACE_EVENT(mm_lru_activate,
- TP_PROTO(struct page *page, unsigned long pfn),
+ TP_PROTO(struct page *page),
- TP_ARGS(page, pfn),
+ TP_ARGS(page),
TP_STRUCT__entry(
__field(struct page *, page )
@@ -75,7 +73,7 @@ TRACE_EVENT(mm_lru_activate,
TP_fast_assign(
__entry->page = page;
- __entry->pfn = pfn;
+ __entry->pfn = page_to_pfn(page);
),
/* Flag format is based on page-types.c formatting for pagemap */
diff --git a/init/Kconfig b/init/Kconfig
index 85fb9851b326..d3ef635b9779 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -807,15 +807,53 @@ config LOG_BUF_SHIFT
range 12 21
default 17
help
- Select kernel log buffer size as a power of 2.
+ Select the minimal kernel log buffer size as a power of 2.
+ The final size is affected by LOG_CPU_MAX_BUF_SHIFT config
+ parameter, see below. Any higher size also might be forced
+ by "log_buf_len" boot parameter.
+
Examples:
- 17 => 128 KB
+ 17 => 128 KB
16 => 64 KB
- 15 => 32 KB
- 14 => 16 KB
+ 15 => 32 KB
+ 14 => 16 KB
13 => 8 KB
12 => 4 KB
+config LOG_CPU_MAX_BUF_SHIFT
+ int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
+ range 0 21
+ default 12 if !BASE_SMALL
+ default 0 if BASE_SMALL
+ help
+ This option allows to increase the default ring buffer size
+ according to the number of CPUs. The value defines the contribution
+ of each CPU as a power of 2. The used space is typically only few
+ lines however it might be much more when problems are reported,
+ e.g. backtraces.
+
+ The increased size means that a new buffer has to be allocated and
+ the original static one is unused. It makes sense only on systems
+ with more CPUs. Therefore this value is used only when the sum of
+ contributions is greater than the half of the default kernel ring
+ buffer as defined by LOG_BUF_SHIFT. The default values are set
+ so that more than 64 CPUs are needed to trigger the allocation.
+
+ Also this option is ignored when "log_buf_len" kernel parameter is
+ used as it forces an exact (power of two) size of the ring buffer.
+
+ The number of possible CPUs is used for this computation ignoring
+ hotplugging making the compuation optimal for the the worst case
+ scenerio while allowing a simple algorithm to be used from bootup.
+
+ Examples shift values and their meaning:
+ 17 => 128 KB for each CPU
+ 16 => 64 KB for each CPU
+ 15 => 32 KB for each CPU
+ 14 => 16 KB for each CPU
+ 13 => 8 KB for each CPU
+ 12 => 4 KB for each CPU
+
#
# Architectures with an unreliable sched_clock() should select this:
#
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 82f22885c87e..b6237c31b0e2 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -539,12 +539,6 @@ void __init prepare_namespace(void)
{
int is_floppy;
- if (root_delay) {
- printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
- root_delay);
- ssleep(root_delay);
- }
-
/*
* wait for the known devices to complete their probing
*
@@ -571,6 +565,12 @@ void __init prepare_namespace(void)
if (initrd_load())
goto out;
+ if (root_delay) {
+ pr_info("Waiting %d sec before mounting root device...\n",
+ root_delay);
+ ssleep(root_delay);
+ }
+
/* wait for any asynchronous scanning to complete */
if ((ROOT_DEV == 0) && root_wait) {
printk(KERN_INFO "Waiting for root device %s...\n",
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index a8227022e3a0..e5d059e8aa11 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -311,9 +311,9 @@ static int exit_code;
static int decompress_error;
static int crd_infd, crd_outfd;
-static int __init compr_fill(void *buf, unsigned int len)
+static long __init compr_fill(void *buf, unsigned long len)
{
- int r = sys_read(crd_infd, buf, len);
+ long r = sys_read(crd_infd, buf, len);
if (r < 0)
printk(KERN_ERR "RAMDISK: error while reading compressed data");
else if (r == 0)
@@ -321,13 +321,13 @@ static int __init compr_fill(void *buf, unsigned int len)
return r;
}
-static int __init compr_flush(void *window, unsigned int outcnt)
+static long __init compr_flush(void *window, unsigned long outcnt)
{
- int written = sys_write(crd_outfd, window, outcnt);
+ long written = sys_write(crd_outfd, window, outcnt);
if (written != outcnt) {
if (decompress_error == 0)
printk(KERN_ERR
- "RAMDISK: incomplete write (%d != %d)\n",
+ "RAMDISK: incomplete write (%ld != %ld)\n",
written, outcnt);
decompress_error = 1;
return -1;
diff --git a/init/initramfs.c b/init/initramfs.c
index a8497fab1c3d..bece48c3461e 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -19,6 +19,29 @@
#include <linux/syscalls.h>
#include <linux/utime.h>
+static ssize_t __init xwrite(int fd, const char *p, size_t count)
+{
+ ssize_t out = 0;
+
+ /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
+ while (count) {
+ ssize_t rv = sys_write(fd, p, count);
+
+ if (rv < 0) {
+ if (rv == -EINTR || rv == -EAGAIN)
+ continue;
+ return out ? out : rv;
+ } else if (rv == 0)
+ break;
+
+ p += rv;
+ out += rv;
+ count -= rv;
+ }
+
+ return out;
+}
+
static __initdata char *message;
static void __init error(char *x)
{
@@ -174,7 +197,7 @@ static __initdata enum state {
} state, next_state;
static __initdata char *victim;
-static __initdata unsigned count;
+static unsigned long count __initdata;
static __initdata loff_t this_header, next_header;
static inline void __init eat(unsigned n)
@@ -186,7 +209,7 @@ static inline void __init eat(unsigned n)
static __initdata char *vcollected;
static __initdata char *collected;
-static __initdata int remains;
+static long remains __initdata;
static __initdata char *collect;
static void __init read_into(char *buf, unsigned size, enum state next)
@@ -213,7 +236,7 @@ static int __init do_start(void)
static int __init do_collect(void)
{
- unsigned n = remains;
+ unsigned long n = remains;
if (count < n)
n = count;
memcpy(collect, victim, n);
@@ -346,7 +369,8 @@ static int __init do_name(void)
static int __init do_copy(void)
{
if (count >= body_len) {
- sys_write(wfd, victim, body_len);
+ if (xwrite(wfd, victim, body_len) != body_len)
+ error("write error");
sys_close(wfd);
do_utime(vcollected, mtime);
kfree(vcollected);
@@ -354,7 +378,8 @@ static int __init do_copy(void)
state = SkipIt;
return 0;
} else {
- sys_write(wfd, victim, count);
+ if (xwrite(wfd, victim, count) != count)
+ error("write error");
body_len -= count;
eat(count);
return 1;
@@ -384,7 +409,7 @@ static __initdata int (*actions[])(void) = {
[Reset] = do_reset,
};
-static int __init write_buffer(char *buf, unsigned len)
+static long __init write_buffer(char *buf, unsigned long len)
{
count = len;
victim = buf;
@@ -394,11 +419,11 @@ static int __init write_buffer(char *buf, unsigned len)
return len - count;
}
-static int __init flush_buffer(void *bufv, unsigned len)
+static long __init flush_buffer(void *bufv, unsigned long len)
{
char *buf = (char *) bufv;
- int written;
- int origLen = len;
+ long written;
+ long origLen = len;
if (message)
return -1;
while ((written = write_buffer(buf, len)) < len && !message) {
@@ -417,13 +442,13 @@ static int __init flush_buffer(void *bufv, unsigned len)
return origLen;
}
-static unsigned my_inptr; /* index of next byte to be processed in inbuf */
+static unsigned long my_inptr; /* index of next byte to be processed in inbuf */
#include <linux/decompress/generic.h>
-static char * __init unpack_to_rootfs(char *buf, unsigned len)
+static char * __init unpack_to_rootfs(char *buf, unsigned long len)
{
- int written, res;
+ long written;
decompress_fn decompress;
const char *compress_name;
static __initdata char msg_buf[64];
@@ -457,7 +482,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len)
decompress = decompress_method(buf, len, &compress_name);
pr_debug("Detected %s compressed data\n", compress_name);
if (decompress) {
- res = decompress(buf, len, NULL, flush_buffer, NULL,
+ int res = decompress(buf, len, NULL, flush_buffer, NULL,
&my_inptr, error);
if (res)
error("decompressor failed");
@@ -603,8 +628,13 @@ static int __init populate_rootfs(void)
fd = sys_open("/initrd.image",
O_WRONLY|O_CREAT, 0700);
if (fd >= 0) {
- sys_write(fd, (char *)initrd_start,
- initrd_end - initrd_start);
+ ssize_t written = xwrite(fd, (char *)initrd_start,
+ initrd_end - initrd_start);
+
+ if (written != initrd_end - initrd_start)
+ pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
+ written, initrd_end - initrd_start);
+
sys_close(fd);
free_initrd();
}
diff --git a/ipc/shm.c b/ipc/shm.c
index 89fc354156cb..7fc9f9f3a26b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -178,6 +178,7 @@ static void shm_rcu_free(struct rcu_head *head)
static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
{
+ list_del(&s->shm_clist);
ipc_rmid(&shm_ids(ns), &s->shm_perm);
}
@@ -268,37 +269,6 @@ static void shm_close(struct vm_area_struct *vma)
}
/* Called with ns->shm_ids(ns).rwsem locked */
-static int shm_try_destroy_current(int id, void *p, void *data)
-{
- struct ipc_namespace *ns = data;
- struct kern_ipc_perm *ipcp = p;
- struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
-
- if (shp->shm_creator != current)
- return 0;
-
- /*
- * Mark it as orphaned to destroy the segment when
- * kernel.shm_rmid_forced is changed.
- * It is noop if the following shm_may_destroy() returns true.
- */
- shp->shm_creator = NULL;
-
- /*
- * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
- * is not set, it shouldn't be deleted here.
- */
- if (!ns->shm_rmid_forced)
- return 0;
-
- if (shm_may_destroy(ns, shp)) {
- shm_lock_by_ptr(shp);
- shm_destroy(ns, shp);
- }
- return 0;
-}
-
-/* Called with ns->shm_ids(ns).rwsem locked */
static int shm_try_destroy_orphaned(int id, void *p, void *data)
{
struct ipc_namespace *ns = data;
@@ -329,18 +299,50 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
up_write(&shm_ids(ns).rwsem);
}
-
+/* Locking assumes this will only be called with task == current */
void exit_shm(struct task_struct *task)
{
struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+ struct shmid_kernel *shp, *n;
- if (shm_ids(ns).in_use == 0)
+ if (list_empty(&task->sysvshm.shm_clist))
return;
- /* Destroy all already created segments, but not mapped yet */
+ /*
+ * If kernel.shm_rmid_forced is not set then only keep track of
+ * which shmids are orphaned, so that a later set of the sysctl
+ * can clean them up.
+ */
+ if (!ns->shm_rmid_forced) {
+ down_read(&shm_ids(ns).rwsem);
+ list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
+ shp->shm_creator = NULL;
+ /*
+ * Only under read lock but we are only called on current
+ * so no entry on the list will be shared.
+ */
+ list_del(&task->sysvshm.shm_clist);
+ up_read(&shm_ids(ns).rwsem);
+ return;
+ }
+
+ /*
+ * Destroy all already created segments, that were not yet mapped,
+ * and mark any mapped as orphan to cover the sysctl toggling.
+ * Destroy is skipped if shm_may_destroy() returns false.
+ */
down_write(&shm_ids(ns).rwsem);
- if (shm_ids(ns).in_use)
- idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
+ list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
+ shp->shm_creator = NULL;
+
+ if (shm_may_destroy(ns, shp)) {
+ shm_lock_by_ptr(shp);
+ shm_destroy(ns, shp);
+ }
+ }
+
+ /* Remove the list head from any segments still attached. */
+ list_del(&task->sysvshm.shm_clist);
up_write(&shm_ids(ns).rwsem);
}
@@ -561,6 +563,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
shp->shm_nattch = 0;
shp->shm_file = file;
shp->shm_creator = current;
+ list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
/*
* shmid gets reported as "inode#" in /proc/pid/maps.
diff --git a/kernel/acct.c b/kernel/acct.c
index a1844f14c6d6..51793520566f 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -141,12 +141,12 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
if (acct->active) {
if (act < 0) {
acct->active = 0;
- printk(KERN_INFO "Process accounting paused\n");
+ pr_info("Process accounting paused\n");
}
} else {
if (act > 0) {
acct->active = 1;
- printk(KERN_INFO "Process accounting resumed\n");
+ pr_info("Process accounting resumed\n");
}
}
@@ -261,6 +261,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
if (name) {
struct filename *tmp = getname(name);
+
if (IS_ERR(tmp))
return PTR_ERR(tmp);
error = acct_on(tmp);
@@ -376,7 +377,7 @@ static comp_t encode_comp_t(unsigned long value)
return exp;
}
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
/*
* encode an u64 into a comp2_t (24 bits)
*
@@ -389,7 +390,7 @@ static comp_t encode_comp_t(unsigned long value)
#define MANTSIZE2 20 /* 20 bit mantissa. */
#define EXPSIZE2 5 /* 5 bit base 2 exponent. */
#define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
-#define MAXEXP2 ((1 <<EXPSIZE2) - 1) /* Maximum exponent. */
+#define MAXEXP2 ((1 << EXPSIZE2) - 1) /* Maximum exponent. */
static comp2_t encode_comp2_t(u64 value)
{
@@ -420,7 +421,7 @@ static comp2_t encode_comp2_t(u64 value)
}
#endif
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
/*
* encode an u64 into a 32 bit IEEE float
*/
@@ -429,8 +430,9 @@ static u32 encode_float(u64 value)
unsigned exp = 190;
unsigned u;
- if (value==0) return 0;
- while ((s64)value > 0){
+ if (value == 0)
+ return 0;
+ while ((s64)value > 0) {
value <<= 1;
exp--;
}
@@ -486,16 +488,17 @@ static void do_acct_process(struct bsd_acct_struct *acct,
run_time -= current->group_leader->start_time;
/* convert nsec -> AHZ */
elapsed = nsec_to_AHZ(run_time);
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
ac.ac_etime = encode_float(elapsed);
#else
ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
- (unsigned long) elapsed : (unsigned long) -1l);
+ (unsigned long) elapsed : (unsigned long) -1l);
#endif
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
{
/* new enlarged etime field */
comp2_t etime = encode_comp2_t(elapsed);
+
ac.ac_etime_hi = etime >> 16;
ac.ac_etime_lo = (u16) etime;
}
@@ -505,15 +508,15 @@ static void do_acct_process(struct bsd_acct_struct *acct,
/* we really need to bite the bullet and change layout */
ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
-#if ACCT_VERSION==2
+#if ACCT_VERSION == 2
ac.ac_ahz = AHZ;
#endif
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
/* backward-compatible 16 bit fields */
ac.ac_uid16 = ac.ac_uid;
ac.ac_gid16 = ac.ac_gid;
#endif
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
ac.ac_pid = task_tgid_nr_ns(current, ns);
rcu_read_lock();
ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
@@ -574,6 +577,7 @@ void acct_collect(long exitcode, int group_dead)
if (group_dead && current->mm) {
struct vm_area_struct *vma;
+
down_read(&current->mm->mmap_sem);
vma = current->mm->mmap;
while (vma) {
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 8e9bc9c3dbb7..c447cd9848d1 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -106,7 +106,7 @@ static inline struct audit_entry *audit_init_entry(u32 field_count)
if (unlikely(!entry))
return NULL;
- fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL);
+ fields = kcalloc(field_count, sizeof(*fields), GFP_KERNEL);
if (unlikely(!fields)) {
kfree(entry);
return NULL;
@@ -160,7 +160,7 @@ static __u32 *classes[AUDIT_SYSCALL_CLASSES];
int __init audit_register_class(int class, unsigned *list)
{
- __u32 *p = kzalloc(AUDIT_BITMASK_SIZE * sizeof(__u32), GFP_KERNEL);
+ __u32 *p = kcalloc(AUDIT_BITMASK_SIZE, sizeof(__u32), GFP_KERNEL);
if (!p)
return -ENOMEM;
while (*list != ~0U) {
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 9fd4246b04b8..e1d1d1952bfa 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,7 +9,6 @@
#include <linux/page-flags.h>
#include <linux/mmzone.h>
#include <linux/kbuild.h>
-#include <linux/page_cgroup.h>
#include <linux/log2.h>
#include <linux/spinlock_types.h>
@@ -18,7 +17,6 @@ void foo(void)
/* The enum constants to put into include/generated/bounds.h */
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
- DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
#ifdef CONFIG_SMP
DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
#endif
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6f3254e8c137..1d0af8a2c646 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -167,6 +167,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
/* For mmu_notifiers */
const unsigned long mmun_start = addr;
const unsigned long mmun_end = addr + PAGE_SIZE;
+ struct mem_cgroup *memcg;
+
+ err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
+ if (err)
+ return err;
/* For try_to_free_swap() and munlock_vma_page() below */
lock_page(page);
@@ -179,6 +184,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
get_page(kpage);
page_add_new_anon_rmap(kpage, vma, addr);
+ mem_cgroup_commit_charge(kpage, memcg, false);
+ lru_cache_add_active_or_unevictable(kpage, vma);
if (!PageAnon(page)) {
dec_mm_counter(mm, MM_FILEPAGES);
@@ -200,6 +207,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
err = 0;
unlock:
+ mem_cgroup_cancel_charge(kpage, memcg);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
unlock_page(page);
return err;
@@ -315,18 +323,11 @@ retry:
if (!new_page)
goto put_old;
- if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
- goto put_new;
-
__SetPageUptodate(new_page);
copy_highpage(new_page, old_page);
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
ret = __replace_page(vma, vaddr, old_page, new_page);
- if (ret)
- mem_cgroup_uncharge_page(new_page);
-
-put_new:
page_cache_release(new_page);
put_old:
put_page(old_page);
diff --git a/kernel/exit.c b/kernel/exit.c
index e5c4668f1799..32c58f7433a3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -59,7 +59,7 @@
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
-static void exit_mm(struct task_struct * tsk);
+static void exit_mm(struct task_struct *tsk);
static void __unhash_process(struct task_struct *p, bool group_dead)
{
@@ -151,7 +151,7 @@ static void __exit_signal(struct task_struct *tsk)
spin_unlock(&sighand->siglock);
__cleanup_sighand(sighand);
- clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
+ clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
if (group_dead) {
flush_sigqueue(&sig->shared_pending);
tty_kref_put(tty);
@@ -168,7 +168,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
}
-void release_task(struct task_struct * p)
+void release_task(struct task_struct *p)
{
struct task_struct *leader;
int zap_leader;
@@ -192,7 +192,8 @@ repeat:
*/
zap_leader = 0;
leader = p->group_leader;
- if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
+ if (leader != p && thread_group_empty(leader)
+ && leader->exit_state == EXIT_ZOMBIE) {
/*
* If we were the last child thread and the leader has
* exited already, and the leader's parent ignores SIGCHLD,
@@ -241,7 +242,8 @@ struct pid *session_of_pgrp(struct pid *pgrp)
*
* "I ask you, have you ever known what it is to be an orphan?"
*/
-static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)
+static int will_become_orphaned_pgrp(struct pid *pgrp,
+ struct task_struct *ignored_task)
{
struct task_struct *p;
@@ -294,9 +296,9 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
struct task_struct *ignored_task = tsk;
if (!parent)
- /* exit: our father is in a different pgrp than
- * we are and we were the only connection outside.
- */
+ /* exit: our father is in a different pgrp than
+ * we are and we were the only connection outside.
+ */
parent = tsk->real_parent;
else
/* reparent: our child is in a different pgrp than
@@ -405,7 +407,7 @@ assign_new_owner:
* Turn us into a lazy TLB process if we
* aren't already..
*/
-static void exit_mm(struct task_struct * tsk)
+static void exit_mm(struct task_struct *tsk)
{
struct mm_struct *mm = tsk->mm;
struct core_state *core_state;
@@ -425,6 +427,7 @@ static void exit_mm(struct task_struct * tsk)
core_state = mm->core_state;
if (core_state) {
struct core_thread self;
+
up_read(&mm->mmap_sem);
self.task = tsk;
@@ -455,6 +458,7 @@ static void exit_mm(struct task_struct * tsk)
task_unlock(tsk);
mm_update_next_owner(mm);
mmput(mm);
+ clear_thread_flag(TIF_MEMDIE);
}
/*
@@ -565,6 +569,7 @@ static void forget_original_parent(struct task_struct *father)
list_for_each_entry_safe(p, n, &father->children, sibling) {
struct task_struct *t = p;
+
do {
t->real_parent = reaper;
if (t->parent == father) {
@@ -598,7 +603,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
/*
* This does two things:
*
- * A. Make init inherit all the child processes
+ * A. Make init inherit all the child processes
* B. Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
@@ -648,9 +653,8 @@ static void check_stack_usage(void)
spin_lock(&low_water_lock);
if (free < lowest_to_date) {
- printk(KERN_WARNING "%s (%d) used greatest stack depth: "
- "%lu bytes left\n",
- current->comm, task_pid_nr(current), free);
+ pr_warn("%s (%d) used greatest stack depth: %lu bytes left\n",
+ current->comm, task_pid_nr(current), free);
lowest_to_date = free;
}
spin_unlock(&low_water_lock);
@@ -691,8 +695,7 @@ void do_exit(long code)
* leave this task alone and wait for reboot.
*/
if (unlikely(tsk->flags & PF_EXITING)) {
- printk(KERN_ALERT
- "Fixing recursive fault but reboot is needed!\n");
+ pr_alert("Fixing recursive fault but reboot is needed!\n");
/*
* We can do this unlocked here. The futex code uses
* this flag just to verify whether the pi state
@@ -716,9 +719,9 @@ void do_exit(long code)
raw_spin_unlock_wait(&tsk->pi_lock);
if (unlikely(in_atomic()))
- printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
- current->comm, task_pid_nr(current),
- preempt_count());
+ pr_info("note: %s[%d] exited with preempt_count %d\n",
+ current->comm, task_pid_nr(current),
+ preempt_count());
acct_update_integrals(tsk);
/* sync mm's RSS info before statistics gathering */
@@ -836,7 +839,6 @@ void do_exit(long code)
for (;;)
cpu_relax(); /* For when BUG is null */
}
-
EXPORT_SYMBOL_GPL(do_exit);
void complete_and_exit(struct completion *comp, long code)
@@ -846,7 +848,6 @@ void complete_and_exit(struct completion *comp, long code)
do_exit(code);
}
-
EXPORT_SYMBOL(complete_and_exit);
SYSCALL_DEFINE1(exit, int, error_code)
@@ -869,6 +870,7 @@ do_group_exit(int exit_code)
exit_code = sig->group_exit_code;
else if (!thread_group_empty(current)) {
struct sighand_struct *const sighand = current->sighand;
+
spin_lock_irq(&sighand->siglock);
if (signal_group_exit(sig))
/* Another thread got here before we took the lock. */
@@ -1033,9 +1035,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
* as other threads in the parent group can be right
* here reaping other children at the same time.
*
- * We use thread_group_cputime_adjusted() to get times for the thread
- * group, which consolidates times for all threads in the
- * group including the group leader.
+ * We use thread_group_cputime_adjusted() to get times for
+ * the thread group, which consolidates times for all threads
+ * in the group including the group leader.
*/
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
spin_lock_irq(&p->real_parent->sighand->siglock);
@@ -1417,6 +1419,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
list_for_each_entry(p, &tsk->children, sibling) {
int ret = wait_consider_task(wo, 0, p);
+
if (ret)
return ret;
}
@@ -1430,6 +1433,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
int ret = wait_consider_task(wo, 1, p);
+
if (ret)
return ret;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index fbd3497b221f..fa9124322cd4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -374,12 +374,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
*/
down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
- mm->locked_vm = 0;
- mm->mmap = NULL;
- mm->vmacache_seqnum = 0;
- mm->map_count = 0;
- cpumask_clear(mm_cpumask(mm));
- mm->mm_rb = RB_ROOT;
+ mm->total_vm = oldmm->total_vm;
+ mm->shared_vm = oldmm->shared_vm;
+ mm->exec_vm = oldmm->exec_vm;
+ mm->stack_vm = oldmm->stack_vm;
+
rb_link = &mm->mm_rb.rb_node;
rb_parent = NULL;
pprev = &mm->mmap;
@@ -536,19 +535,37 @@ static void mm_init_aio(struct mm_struct *mm)
#endif
}
+static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
+{
+#ifdef CONFIG_MEMCG
+ mm->owner = p;
+#endif
+}
+
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
{
+ mm->mmap = NULL;
+ mm->mm_rb = RB_ROOT;
+ mm->vmacache_seqnum = 0;
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem);
INIT_LIST_HEAD(&mm->mmlist);
mm->core_state = NULL;
atomic_long_set(&mm->nr_ptes, 0);
+ mm->map_count = 0;
+ mm->locked_vm = 0;
+ mm->pinned_vm = 0;
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
+ mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
+ mmu_notifier_mm_init(mm);
clear_tlb_flush_pending(mm);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+ mm->pmd_huge_pte = NULL;
+#endif
if (current->mm) {
mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -558,11 +575,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
mm->def_flags = 0;
}
- if (likely(!mm_alloc_pgd(mm))) {
- mmu_notifier_mm_init(mm);
- return mm;
- }
+ if (mm_alloc_pgd(mm))
+ goto fail_nopgd;
+
+ if (init_new_context(p, mm))
+ goto fail_nocontext;
+
+ return mm;
+fail_nocontext:
+ mm_free_pgd(mm);
+fail_nopgd:
free_mm(mm);
return NULL;
}
@@ -596,7 +619,6 @@ struct mm_struct *mm_alloc(void)
return NULL;
memset(mm, 0, sizeof(*mm));
- mm_init_cpumask(mm);
return mm_init(mm, current);
}
@@ -828,17 +850,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
goto fail_nomem;
memcpy(mm, oldmm, sizeof(*mm));
- mm_init_cpumask(mm);
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
- mm->pmd_huge_pte = NULL;
-#endif
if (!mm_init(mm, tsk))
goto fail_nomem;
- if (init_new_context(tsk, mm))
- goto fail_nocontext;
-
dup_mm_exe_file(oldmm, mm);
err = dup_mmap(mm, oldmm);
@@ -860,15 +875,6 @@ free_pt:
fail_nomem:
return NULL;
-
-fail_nocontext:
- /*
- * If init_new_context() failed, we cannot use mmput() to free the mm
- * because it calls destroy_context()
- */
- mm_free_pgd(mm);
- free_mm(mm);
- return NULL;
}
static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
@@ -1140,13 +1146,6 @@ static void rt_mutex_init_task(struct task_struct *p)
#endif
}
-#ifdef CONFIG_MEMCG
-void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
- mm->owner = p;
-}
-#endif /* CONFIG_MEMCG */
-
/*
* Initialize POSIX timer handling for a single task.
*/
@@ -1346,10 +1345,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
-#ifdef CONFIG_MEMCG
- p->memcg_batch.do_batch = 0;
- p->memcg_batch.memcg = NULL;
-#endif
#ifdef CONFIG_BCACHE
p->sequential_io = 0;
p->sequential_io_avg = 0;
@@ -1367,6 +1362,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (retval)
goto bad_fork_cleanup_policy;
/* copy all the process information */
+ shm_init_task(p);
retval = copy_semundo(clone_flags, p);
if (retval)
goto bad_fork_cleanup_audit;
@@ -1918,6 +1914,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
*/
exit_sem(current);
}
+ if (unshare_flags & CLONE_NEWIPC) {
+ /* Orphan segments in old ns (see sem above). */
+ exit_shm(current);
+ shm_init_task(current);
+ }
if (new_nsproxy)
switch_task_namespaces(current, new_nsproxy);
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
index 15ff01a76379..edf67c493a8e 100644
--- a/kernel/gcov/fs.c
+++ b/kernel/gcov/fs.c
@@ -784,8 +784,7 @@ static __init int gcov_fs_init(void)
err_remove:
pr_err("init failed\n");
- if (root_node.dentry)
- debugfs_remove(root_node.dentry);
+ debugfs_remove(root_node.dentry);
return rc;
}
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index cb0cf37dac3a..ae5167087845 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -364,7 +364,7 @@ static int __sprint_symbol(char *buffer, unsigned long address,
address += symbol_offset;
name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
if (!name)
- return sprintf(buffer, "0x%lx", address);
+ return sprintf(buffer, "0x%lx", address - symbol_offset);
if (name != buffer)
strcpy(buffer, name);
diff --git a/kernel/panic.c b/kernel/panic.c
index 62e16cef9cc2..d09dc5c32c67 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -224,6 +224,7 @@ static const struct tnt tnts[] = {
{ TAINT_FIRMWARE_WORKAROUND, 'I', ' ' },
{ TAINT_OOT_MODULE, 'O', ' ' },
{ TAINT_UNSIGNED_MODULE, 'E', ' ' },
+ { TAINT_SOFTLOCKUP, 'L', ' ' },
};
/**
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 13e839dbca07..de1a6bb6861d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -45,6 +45,7 @@
#include <linux/poll.h>
#include <linux/irq_work.h>
#include <linux/utsname.h>
+#include <linux/ctype.h>
#include <asm/uaccess.h>
@@ -56,7 +57,7 @@
int console_printk[4] = {
CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */
- DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */
+ MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */
CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */
CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */
};
@@ -113,9 +114,9 @@ static int __down_trylock_console_sem(unsigned long ip)
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
* definitely not the perfect debug tool (we don't know if _WE_
- * hold it are racing, but it helps tracking those weird code
- * path in the console code where we end up in places I want
- * locked without the console sempahore held
+ * hold it and are racing, but it helps tracking those weird code
+ * paths in the console code where we end up in places I want
+ * locked without the console sempahore held).
*/
static int console_locked, console_suspended;
@@ -146,8 +147,8 @@ static int console_may_schedule;
* the overall length of the record.
*
* The heads to the first and last entry in the buffer, as well as the
- * sequence numbers of these both entries are maintained when messages
- * are stored..
+ * sequence numbers of these entries are maintained when messages are
+ * stored.
*
* If the heads indicate available messages, the length in the header
* tells the start next message. A length == 0 for the next message
@@ -257,7 +258,7 @@ static u64 clear_seq;
static u32 clear_idx;
#define PREFIX_MAX 32
-#define LOG_LINE_MAX 1024 - PREFIX_MAX
+#define LOG_LINE_MAX (1024 - PREFIX_MAX)
/* record buffer */
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
@@ -266,6 +267,7 @@ static u32 clear_idx;
#define LOG_ALIGN __alignof__(struct printk_log)
#endif
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
+#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;
@@ -344,7 +346,7 @@ static int log_make_free_space(u32 msg_size)
while (log_first_seq < log_next_seq) {
if (logbuf_has_space(msg_size, false))
return 0;
- /* drop old messages until we have enough continuous space */
+ /* drop old messages until we have enough contiguous space */
log_first_idx = log_next(log_first_idx);
log_first_seq++;
}
@@ -453,11 +455,7 @@ static int log_store(int facility, int level,
return msg->text_len;
}
-#ifdef CONFIG_SECURITY_DMESG_RESTRICT
-int dmesg_restrict = 1;
-#else
-int dmesg_restrict;
-#endif
+int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
static int syslog_action_restricted(int type)
{
@@ -828,34 +826,74 @@ void log_buf_kexec_setup(void)
/* requested log_buf_len from kernel cmdline */
static unsigned long __initdata new_log_buf_len;
-/* save requested log_buf_len since it's too early to process it */
-static int __init log_buf_len_setup(char *str)
+/* we practice scaling the ring buffer by powers of 2 */
+static void __init log_buf_len_update(unsigned size)
{
- unsigned size = memparse(str, &str);
-
if (size)
size = roundup_pow_of_two(size);
if (size > log_buf_len)
new_log_buf_len = size;
+}
+
+/* save requested log_buf_len since it's too early to process it */
+static int __init log_buf_len_setup(char *str)
+{
+ unsigned size = memparse(str, &str);
+
+ log_buf_len_update(size);
return 0;
}
early_param("log_buf_len", log_buf_len_setup);
+static void __init log_buf_add_cpu(void)
+{
+ unsigned int cpu_extra;
+
+ /*
+ * archs should set up cpu_possible_bits properly with
+ * set_cpu_possible() after setup_arch() but just in
+ * case lets ensure this is valid.
+ */
+ if (num_possible_cpus() == 1)
+ return;
+
+ cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;
+
+ /* by default this will only continue through for large > 64 CPUs */
+ if (cpu_extra <= __LOG_BUF_LEN / 2)
+ return;
+
+ pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
+ __LOG_CPU_MAX_BUF_LEN);
+ pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
+ cpu_extra);
+ pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);
+
+ log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
+}
+
void __init setup_log_buf(int early)
{
unsigned long flags;
char *new_log_buf;
int free;
+ if (log_buf != __log_buf)
+ return;
+
+ if (!early && !new_log_buf_len)
+ log_buf_add_cpu();
+
if (!new_log_buf_len)
return;
if (early) {
new_log_buf =
- memblock_virt_alloc(new_log_buf_len, PAGE_SIZE);
+ memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
} else {
- new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, 0);
+ new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
+ LOG_ALIGN);
}
if (unlikely(!new_log_buf)) {
@@ -872,7 +910,7 @@ void __init setup_log_buf(int early)
memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
- pr_info("log_buf_len: %d\n", log_buf_len);
+ pr_info("log_buf_len: %d bytes\n", log_buf_len);
pr_info("early log buf free: %d(%d%%)\n",
free, (free * 100) / __LOG_BUF_LEN);
}
@@ -881,7 +919,7 @@ static bool __read_mostly ignore_loglevel;
static int __init ignore_loglevel_setup(char *str)
{
- ignore_loglevel = 1;
+ ignore_loglevel = true;
pr_info("debug: ignoring loglevel setting.\n");
return 0;
@@ -947,11 +985,7 @@ static inline void boot_delay_msec(int level)
}
#endif
-#if defined(CONFIG_PRINTK_TIME)
-static bool printk_time = 1;
-#else
-static bool printk_time;
-#endif
+static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
static size_t print_time(u64 ts, char *buf)
@@ -1310,7 +1344,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
* for pending data, not the size; return the count of
* records, not the length.
*/
- error = log_next_idx - syslog_idx;
+ error = log_next_seq - syslog_seq;
} else {
u64 seq = syslog_seq;
u32 idx = syslog_idx;
@@ -1416,10 +1450,9 @@ static int have_callable_console(void)
/*
* Can we actually use the console at this time on this cpu?
*
- * Console drivers may assume that per-cpu resources have
- * been allocated. So unless they're explicitly marked as
- * being able to cope (CON_ANYTIME) don't call them until
- * this CPU is officially up.
+ * Console drivers may assume that per-cpu resources have been allocated. So
+ * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
+ * call them until this CPU is officially up.
*/
static inline int can_use_console(unsigned int cpu)
{
@@ -1432,8 +1465,10 @@ static inline int can_use_console(unsigned int cpu)
* console_lock held, and 'console_locked' set) if it
* is successful, false otherwise.
*/
-static int console_trylock_for_printk(unsigned int cpu)
+static int console_trylock_for_printk(void)
{
+ unsigned int cpu = smp_processor_id();
+
if (!console_trylock())
return 0;
/*
@@ -1476,7 +1511,7 @@ static struct cont {
struct task_struct *owner; /* task of first print*/
u64 ts_nsec; /* time of first print */
u8 level; /* log level of first message */
- u8 facility; /* log level of first message */
+ u8 facility; /* log facility of first message */
enum log_flags flags; /* prefix, newline flags */
bool flushed:1; /* buffer sealed and committed */
} cont;
@@ -1608,7 +1643,8 @@ asmlinkage int vprintk_emit(int facility, int level,
*/
if (!oops_in_progress && !lockdep_recursing(current)) {
recursion_bug = 1;
- goto out_restore_irqs;
+ local_irq_restore(flags);
+ return 0;
}
zap_locks();
}
@@ -1716,21 +1752,30 @@ asmlinkage int vprintk_emit(int facility, int level,
logbuf_cpu = UINT_MAX;
raw_spin_unlock(&logbuf_lock);
+ lockdep_on();
+ local_irq_restore(flags);
/* If called from the scheduler, we can not call up(). */
if (!in_sched) {
+ lockdep_off();
+ /*
+ * Disable preemption to avoid being preempted while holding
+ * console_sem which would prevent anyone from printing to
+ * console
+ */
+ preempt_disable();
+
/*
* Try to acquire and then immediately release the console
* semaphore. The release will print out buffers and wake up
* /dev/kmsg and syslog() users.
*/
- if (console_trylock_for_printk(this_cpu))
+ if (console_trylock_for_printk())
console_unlock();
+ preempt_enable();
+ lockdep_on();
}
- lockdep_on();
-out_restore_irqs:
- local_irq_restore(flags);
return printed_len;
}
EXPORT_SYMBOL(vprintk_emit);
@@ -1802,7 +1847,7 @@ EXPORT_SYMBOL(printk);
#define LOG_LINE_MAX 0
#define PREFIX_MAX 0
-#define LOG_LINE_MAX 0
+
static u64 syslog_seq;
static u32 syslog_idx;
static u64 console_seq;
@@ -1881,11 +1926,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
return 0;
}
/*
- * Set up a list of consoles. Called from init/main.c
+ * Set up a console. Called via do_early_param() in init/main.c
+ * for each "console=" parameter in the boot command line.
*/
static int __init console_setup(char *str)
{
- char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */
+ char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
char *s, *options, *brl_options = NULL;
int idx;
@@ -1902,7 +1948,8 @@ static int __init console_setup(char *str)
strncpy(buf, str, sizeof(buf) - 1);
}
buf[sizeof(buf) - 1] = 0;
- if ((options = strchr(str, ',')) != NULL)
+ options = strchr(str, ',');
+ if (options)
*(options++) = 0;
#ifdef __sparc__
if (!strcmp(str, "ttya"))
@@ -1911,7 +1958,7 @@ static int __init console_setup(char *str)
strcpy(buf, "ttyS1");
#endif
for (s = buf; *s; s++)
- if ((*s >= '0' && *s <= '9') || *s == ',')
+ if (isdigit(*s) || *s == ',')
break;
idx = simple_strtoul(s, NULL, 10);
*s = 0;
@@ -1950,7 +1997,6 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
i++, c++)
if (strcmp(c->name, name) == 0 && c->index == idx) {
strlcpy(c->name, name_new, sizeof(c->name));
- c->name[sizeof(c->name) - 1] = 0;
c->options = options;
c->index = idx_new;
return i;
@@ -1959,12 +2005,12 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
return -1;
}
-bool console_suspend_enabled = 1;
+bool console_suspend_enabled = true;
EXPORT_SYMBOL(console_suspend_enabled);
static int __init console_suspend_disable(char *str)
{
- console_suspend_enabled = 0;
+ console_suspend_enabled = false;
return 1;
}
__setup("no_console_suspend", console_suspend_disable);
@@ -2045,8 +2091,8 @@ EXPORT_SYMBOL(console_lock);
/**
* console_trylock - try to lock the console system for exclusive use.
*
- * Tried to acquire a lock which guarantees that the caller has
- * exclusive access to the console system and the console_drivers list.
+ * Try to acquire a lock which guarantees that the caller has exclusive
+ * access to the console system and the console_drivers list.
*
* returns 1 on success, and 0 on failure to acquire the lock.
*/
@@ -2618,14 +2664,13 @@ EXPORT_SYMBOL(__printk_ratelimit);
bool printk_timed_ratelimit(unsigned long *caller_jiffies,
unsigned int interval_msecs)
{
- if (*caller_jiffies == 0
- || !time_in_range(jiffies, *caller_jiffies,
- *caller_jiffies
- + msecs_to_jiffies(interval_msecs))) {
- *caller_jiffies = jiffies;
- return true;
- }
- return false;
+ unsigned long elapsed = jiffies - *caller_jiffies;
+
+ if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs))
+ return false;
+
+ *caller_jiffies = jiffies;
+ return true;
}
EXPORT_SYMBOL(printk_timed_ratelimit);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75b22e22a72c..75875a741b5e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1240,8 +1240,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = hugetlb_sysctl_handler,
- .extra1 = (void *)&hugetlb_zero,
- .extra2 = (void *)&hugetlb_infinity,
+ .extra1 = &zero,
},
#ifdef CONFIG_NUMA
{
@@ -1250,8 +1249,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = &hugetlb_mempolicy_sysctl_handler,
- .extra1 = (void *)&hugetlb_zero,
- .extra2 = (void *)&hugetlb_infinity,
+ .extra1 = &zero,
},
#endif
{
@@ -1274,8 +1272,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = hugetlb_overcommit_handler,
- .extra1 = (void *)&hugetlb_zero,
- .extra2 = (void *)&hugetlb_infinity,
+ .extra1 = &zero,
},
#endif
{
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 12d6ebbfdd83..0dbab6d1acb4 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -14,6 +14,8 @@
* the GNU General Public License for more details.
*/
+#define pr_fmt(fmt) "Kprobe smoke test: " fmt
+
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/random.h>
@@ -41,8 +43,7 @@ static void kp_post_handler(struct kprobe *p, struct pt_regs *regs,
{
if (preh_val != (rand1 / div_factor)) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in post_handler\n");
+ pr_err("incorrect value in post_handler\n");
}
posth_val = preh_val + div_factor;
}
@@ -59,8 +60,7 @@ static int test_kprobe(void)
ret = register_kprobe(&kp);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kprobe returned %d\n", ret);
+ pr_err("register_kprobe returned %d\n", ret);
return ret;
}
@@ -68,14 +68,12 @@ static int test_kprobe(void)
unregister_kprobe(&kp);
if (preh_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe pre_handler not called\n");
+ pr_err("kprobe pre_handler not called\n");
handler_errors++;
}
if (posth_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe post_handler not called\n");
+ pr_err("kprobe post_handler not called\n");
handler_errors++;
}
@@ -98,8 +96,7 @@ static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs,
{
if (preh_val != (rand1 / div_factor) + 1) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in post_handler2\n");
+ pr_err("incorrect value in post_handler2\n");
}
posth_val = preh_val + div_factor;
}
@@ -120,8 +117,7 @@ static int test_kprobes(void)
kp.flags = 0;
ret = register_kprobes(kps, 2);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kprobes returned %d\n", ret);
+ pr_err("register_kprobes returned %d\n", ret);
return ret;
}
@@ -130,14 +126,12 @@ static int test_kprobes(void)
ret = target(rand1);
if (preh_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe pre_handler not called\n");
+ pr_err("kprobe pre_handler not called\n");
handler_errors++;
}
if (posth_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe post_handler not called\n");
+ pr_err("kprobe post_handler not called\n");
handler_errors++;
}
@@ -146,14 +140,12 @@ static int test_kprobes(void)
ret = target2(rand1);
if (preh_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe pre_handler2 not called\n");
+ pr_err("kprobe pre_handler2 not called\n");
handler_errors++;
}
if (posth_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe post_handler2 not called\n");
+ pr_err("kprobe post_handler2 not called\n");
handler_errors++;
}
@@ -166,8 +158,7 @@ static u32 j_kprobe_target(u32 value)
{
if (value != rand1) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in jprobe handler\n");
+ pr_err("incorrect value in jprobe handler\n");
}
jph_val = rand1;
@@ -186,16 +177,14 @@ static int test_jprobe(void)
ret = register_jprobe(&jp);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_jprobe returned %d\n", ret);
+ pr_err("register_jprobe returned %d\n", ret);
return ret;
}
ret = target(rand1);
unregister_jprobe(&jp);
if (jph_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "jprobe handler not called\n");
+ pr_err("jprobe handler not called\n");
handler_errors++;
}
@@ -217,24 +206,21 @@ static int test_jprobes(void)
jp.kp.flags = 0;
ret = register_jprobes(jps, 2);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_jprobes returned %d\n", ret);
+ pr_err("register_jprobes returned %d\n", ret);
return ret;
}
jph_val = 0;
ret = target(rand1);
if (jph_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "jprobe handler not called\n");
+ pr_err("jprobe handler not called\n");
handler_errors++;
}
jph_val = 0;
ret = target2(rand1);
if (jph_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "jprobe handler2 not called\n");
+ pr_err("jprobe handler2 not called\n");
handler_errors++;
}
unregister_jprobes(jps, 2);
@@ -256,13 +242,11 @@ static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
if (ret != (rand1 / div_factor)) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in kretprobe handler\n");
+ pr_err("incorrect value in kretprobe handler\n");
}
if (krph_val == 0) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "call to kretprobe entry handler failed\n");
+ pr_err("call to kretprobe entry handler failed\n");
}
krph_val = rand1;
@@ -281,16 +265,14 @@ static int test_kretprobe(void)
ret = register_kretprobe(&rp);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kretprobe returned %d\n", ret);
+ pr_err("register_kretprobe returned %d\n", ret);
return ret;
}
ret = target(rand1);
unregister_kretprobe(&rp);
if (krph_val != rand1) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kretprobe handler not called\n");
+ pr_err("kretprobe handler not called\n");
handler_errors++;
}
@@ -303,13 +285,11 @@ static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs)
if (ret != (rand1 / div_factor) + 1) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in kretprobe handler2\n");
+ pr_err("incorrect value in kretprobe handler2\n");
}
if (krph_val == 0) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "call to kretprobe entry handler failed\n");
+ pr_err("call to kretprobe entry handler failed\n");
}
krph_val = rand1;
@@ -332,24 +312,21 @@ static int test_kretprobes(void)
rp.kp.flags = 0;
ret = register_kretprobes(rps, 2);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kretprobe returned %d\n", ret);
+ pr_err("register_kretprobe returned %d\n", ret);
return ret;
}
krph_val = 0;
ret = target(rand1);
if (krph_val != rand1) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kretprobe handler not called\n");
+ pr_err("kretprobe handler not called\n");
handler_errors++;
}
krph_val = 0;
ret = target2(rand1);
if (krph_val != rand1) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kretprobe handler2 not called\n");
+ pr_err("kretprobe handler2 not called\n");
handler_errors++;
}
unregister_kretprobes(rps, 2);
@@ -368,7 +345,7 @@ int init_test_probes(void)
rand1 = prandom_u32();
} while (rand1 <= div_factor);
- printk(KERN_INFO "Kprobe smoke test started\n");
+ pr_info("started\n");
num_tests++;
ret = test_kprobe();
if (ret < 0)
@@ -402,13 +379,11 @@ int init_test_probes(void)
#endif /* CONFIG_KRETPROBES */
if (errors)
- printk(KERN_ERR "BUG: Kprobe smoke test: %d out of "
- "%d tests failed\n", errors, num_tests);
+ pr_err("BUG: %d out of %d tests failed\n", errors, num_tests);
else if (handler_errors)
- printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) "
- "running handlers\n", handler_errors);
+ pr_err("BUG: %d error(s) running handlers\n", handler_errors);
else
- printk(KERN_INFO "Kprobe smoke test passed successfully\n");
+ pr_info("passed successfully\n");
return 0;
}
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index fcc02560fd6b..aa312b0dc3ec 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -526,21 +526,21 @@ static void m_stop(struct seq_file *seq, void *v)
return;
}
-struct seq_operations proc_uid_seq_operations = {
+const struct seq_operations proc_uid_seq_operations = {
.start = uid_m_start,
.stop = m_stop,
.next = m_next,
.show = uid_m_show,
};
-struct seq_operations proc_gid_seq_operations = {
+const struct seq_operations proc_gid_seq_operations = {
.start = gid_m_start,
.stop = m_stop,
.next = m_next,
.show = gid_m_show,
};
-struct seq_operations proc_projid_seq_operations = {
+const struct seq_operations proc_projid_seq_operations = {
.start = projid_m_start,
.stop = m_stop,
.next = m_next,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index c3319bd1b040..a8d6914030fe 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -260,9 +260,11 @@ static void watchdog_overflow_callback(struct perf_event *event,
return;
if (hardlockup_panic)
- panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ panic("Watchdog detected hard LOCKUP on cpu %d",
+ this_cpu);
else
- WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
+ this_cpu);
__this_cpu_write(hard_watchdog_warn, true);
return;
@@ -345,7 +347,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
}
}
- printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+ pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
print_modules();
@@ -366,6 +368,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
smp_mb__after_atomic();
}
+ add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
__this_cpu_write(soft_watchdog_warn, true);
@@ -484,7 +487,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
if (PTR_ERR(event) == -EOPNOTSUPP)
pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
else if (PTR_ERR(event) == -ENOENT)
- pr_warning("disabled (cpu%i): hardware events not enabled\n",
+ pr_warn("disabled (cpu%i): hardware events not enabled\n",
cpu);
else
pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
diff --git a/lib/Kconfig b/lib/Kconfig
index a8a775730c09..2accc79fdb6f 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -177,6 +177,13 @@ config CRC8
when they need to do cyclic redundancy check according CRC8
algorithm. Module will be called crc8.
+config CRC64_ECMA
+ tristate "CRC64 ECMA function"
+ help
+ This option provides CRC64 ECMA function. Drivers may select this
+ when they need to do cyclic redundancy check according to the CRC64
+ ECMA algorithm.
+
config AUDIT_GENERIC
bool
depends on AUDIT && !AUDIT_ARCH
@@ -396,6 +403,39 @@ config CPU_RMAP
config DQL
bool
+config GLOB
+ bool
+# This actually supports modular compilation, but the module overhead
+# is ridiculous for the amount of code involved. Until an out-of-tree
+# driver asks for it, we'll just link it directly it into the kernel
+# when required. Since we're ignoring out-of-tree users, there's also
+# no need bother prompting for a manual decision:
+# prompt "glob_match() function"
+ help
+ This option provides a glob_match function for performing
+ simple text pattern matching. It originated in the ATA code
+ to blacklist particular drive models, but other device drivers
+ may need similar functionality.
+
+ All drivers in the Linux kernel tree that require this function
+ should automatically select this option. Say N unless you
+ are compiling an out-of tree driver which tells you that it
+ depends on this.
+
+config GLOB_SELFTEST
+ bool "glob self-test on init"
+ default n
+ depends on GLOB
+ help
+ This option enables a simple self-test of the glob_match
+ function on startup. It is primarily useful for people
+ working on the code to ensure they haven't introduced any
+ regressions.
+
+ It only adds a little bit of code and slows kernel boot (or
+ module load) by a small amount, so you're welcome to play with
+ it, but you probably don't need it.
+
#
# Netlink attribute parsing support is select'ed if needed
#
@@ -475,4 +515,11 @@ config UCS2_STRING
source "lib/fonts/Kconfig"
+#
+# sg chaining option
+#
+
+config ARCH_HAS_SG_CHAIN
+ def_bool n
+
endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index df5661c12554..0d1bc8d227ed 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -15,7 +15,7 @@ config PRINTK_TIME
The behavior is also controlled by the kernel command line
parameter printk.time=1. See Documentation/kernel-parameters.txt
-config DEFAULT_MESSAGE_LOGLEVEL
+config MESSAGE_LOGLEVEL_DEFAULT
int "Default message log level (1-7)"
range 1 7
default "4"
diff --git a/lib/Makefile b/lib/Makefile
index 8427df95dade..b73c3c33047e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -72,6 +72,7 @@ obj-$(CONFIG_CRC32) += crc32.o
obj-$(CONFIG_CRC7) += crc7.o
obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
obj-$(CONFIG_CRC8) += crc8.o
+obj-$(CONFIG_CRC64_ECMA) += crc64_ecma.o
obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
@@ -137,6 +138,8 @@ obj-$(CONFIG_CORDIC) += cordic.o
obj-$(CONFIG_DQL) += dynamic_queue_limits.o
+obj-$(CONFIG_GLOB) += glob.o
+
obj-$(CONFIG_MPILIB) += mpi/
obj-$(CONFIG_SIGNATURE) += digsig.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06f7e4fe8d2d..1e031f2c9aba 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -40,9 +40,9 @@
* for the best explanations of this ordering.
*/
-int __bitmap_empty(const unsigned long *bitmap, int bits)
+int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap[k])
return 0;
@@ -55,9 +55,9 @@ int __bitmap_empty(const unsigned long *bitmap, int bits)
}
EXPORT_SYMBOL(__bitmap_empty);
-int __bitmap_full(const unsigned long *bitmap, int bits)
+int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (~bitmap[k])
return 0;
@@ -71,9 +71,9 @@ int __bitmap_full(const unsigned long *bitmap, int bits)
EXPORT_SYMBOL(__bitmap_full);
int __bitmap_equal(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap1[k] != bitmap2[k])
return 0;
@@ -86,14 +86,14 @@ int __bitmap_equal(const unsigned long *bitmap1,
}
EXPORT_SYMBOL(__bitmap_equal);
-void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
+void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
dst[k] = ~src[k];
if (bits % BITS_PER_LONG)
- dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+ dst[k] = ~src[k];
}
EXPORT_SYMBOL(__bitmap_complement);
@@ -182,23 +182,26 @@ void __bitmap_shift_left(unsigned long *dst,
EXPORT_SYMBOL(__bitmap_shift_left);
int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k;
- int nr = BITS_TO_LONGS(bits);
+ unsigned int k;
+ unsigned int lim = bits/BITS_PER_LONG;
unsigned long result = 0;
- for (k = 0; k < nr; k++)
+ for (k = 0; k < lim; k++)
result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+ if (bits % BITS_PER_LONG)
+ result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+ BITMAP_LAST_WORD_MASK(bits));
return result != 0;
}
EXPORT_SYMBOL(__bitmap_and);
void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k;
- int nr = BITS_TO_LONGS(bits);
+ unsigned int k;
+ unsigned int nr = BITS_TO_LONGS(bits);
for (k = 0; k < nr; k++)
dst[k] = bitmap1[k] | bitmap2[k];
@@ -206,10 +209,10 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
EXPORT_SYMBOL(__bitmap_or);
void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k;
- int nr = BITS_TO_LONGS(bits);
+ unsigned int k;
+ unsigned int nr = BITS_TO_LONGS(bits);
for (k = 0; k < nr; k++)
dst[k] = bitmap1[k] ^ bitmap2[k];
@@ -217,22 +220,25 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
EXPORT_SYMBOL(__bitmap_xor);
int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k;
- int nr = BITS_TO_LONGS(bits);
+ unsigned int k;
+ unsigned int lim = bits/BITS_PER_LONG;
unsigned long result = 0;
- for (k = 0; k < nr; k++)
+ for (k = 0; k < lim; k++)
result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+ if (bits % BITS_PER_LONG)
+ result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
+ BITMAP_LAST_WORD_MASK(bits));
return result != 0;
}
EXPORT_SYMBOL(__bitmap_andnot);
int __bitmap_intersects(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap1[k] & bitmap2[k])
return 1;
@@ -245,9 +251,9 @@ int __bitmap_intersects(const unsigned long *bitmap1,
EXPORT_SYMBOL(__bitmap_intersects);
int __bitmap_subset(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap1[k] & ~bitmap2[k])
return 0;
@@ -259,9 +265,10 @@ int __bitmap_subset(const unsigned long *bitmap1,
}
EXPORT_SYMBOL(__bitmap_subset);
-int __bitmap_weight(const unsigned long *bitmap, int bits)
+int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
{
- int k, w = 0, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
+ int w = 0;
for (k = 0; k < lim; k++)
w += hweight_long(bitmap[k]);
@@ -273,42 +280,42 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
}
EXPORT_SYMBOL(__bitmap_weight);
-void bitmap_set(unsigned long *map, int start, int nr)
+void bitmap_set(unsigned long *map, unsigned int start, int len)
{
unsigned long *p = map + BIT_WORD(start);
- const int size = start + nr;
+ const unsigned int size = start + len;
int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
- while (nr - bits_to_set >= 0) {
+ while (len - bits_to_set >= 0) {
*p |= mask_to_set;
- nr -= bits_to_set;
+ len -= bits_to_set;
bits_to_set = BITS_PER_LONG;
mask_to_set = ~0UL;
p++;
}
- if (nr) {
+ if (len) {
mask_to_set &= BITMAP_LAST_WORD_MASK(size);
*p |= mask_to_set;
}
}
EXPORT_SYMBOL(bitmap_set);
-void bitmap_clear(unsigned long *map, int start, int nr)
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
{
unsigned long *p = map + BIT_WORD(start);
- const int size = start + nr;
+ const unsigned int size = start + len;
int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
- while (nr - bits_to_clear >= 0) {
+ while (len - bits_to_clear >= 0) {
*p &= ~mask_to_clear;
- nr -= bits_to_clear;
+ len -= bits_to_clear;
bits_to_clear = BITS_PER_LONG;
mask_to_clear = ~0UL;
p++;
}
- if (nr) {
+ if (len) {
mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
*p &= ~mask_to_clear;
}
@@ -664,13 +671,8 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
{
- char *nl = strchr(bp, '\n');
- int len;
-
- if (nl)
- len = nl - bp;
- else
- len = strlen(bp);
+ char *nl = strchrnul(bp, '\n');
+ int len = nl - bp;
return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
}
@@ -716,7 +718,7 @@ EXPORT_SYMBOL(bitmap_parselist_user);
*
* If for example, just bits 4 through 7 are set in @buf, then @pos
* values 4 through 7 will get mapped to 0 through 3, respectively,
- * and other @pos values will get mapped to 0. When @pos value 7
+ * and other @pos values will get mapped to -1. When @pos value 7
* gets mapped to (returns) @ord value 3 in this example, that means
* that bit 7 is the 3rd (starting with 0th) set bit in @buf.
*
@@ -1046,7 +1048,7 @@ enum {
REG_OP_RELEASE, /* clear all bits in region */
};
-static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
+static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op)
{
int nbits_reg; /* number of bits in region */
int index; /* index first long of region in bitmap */
@@ -1112,11 +1114,11 @@ done:
* Return the bit offset in bitmap of the allocated region,
* or -errno on failure.
*/
-int bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
{
- int pos, end; /* scans bitmap by regions of size order */
+ unsigned int pos, end; /* scans bitmap by regions of size order */
- for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
+ for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) {
if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
continue;
__reg_op(bitmap, pos, order, REG_OP_ALLOC);
@@ -1137,7 +1139,7 @@ EXPORT_SYMBOL(bitmap_find_free_region);
*
* No return value.
*/
-void bitmap_release_region(unsigned long *bitmap, int pos, int order)
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
{
__reg_op(bitmap, pos, order, REG_OP_RELEASE);
}
@@ -1154,12 +1156,11 @@ EXPORT_SYMBOL(bitmap_release_region);
* Return 0 on success, or %-EBUSY if specified region wasn't
* free (not all bits were zero).
*/
-int bitmap_allocate_region(unsigned long *bitmap, int pos, int order)
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
{
if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
return -EBUSY;
- __reg_op(bitmap, pos, order, REG_OP_ALLOC);
- return 0;
+ return __reg_op(bitmap, pos, order, REG_OP_ALLOC);
}
EXPORT_SYMBOL(bitmap_allocate_region);
diff --git a/lib/cmdline.c b/lib/cmdline.c
index d4932f745e92..76a712e6e20e 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -121,11 +121,7 @@ EXPORT_SYMBOL(get_options);
* @retptr: (output) Optional pointer to next char after parse completes
*
* Parses a string into a number. The number stored at @ptr is
- * potentially suffixed with %K (for kilobytes, or 1024 bytes),
- * %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
- * 1073741824). If the number is suffixed with K, M, or G, then
- * the return value is the number multiplied by one kilobyte, one
- * megabyte, or one gigabyte, respectively.
+ * potentially suffixed with K, M, G, T, P, E.
*/
unsigned long long memparse(const char *ptr, char **retptr)
@@ -135,6 +131,15 @@ unsigned long long memparse(const char *ptr, char **retptr)
unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
switch (*endptr) {
+ case 'E':
+ case 'e':
+ ret <<= 10;
+ case 'P':
+ case 'p':
+ ret <<= 10;
+ case 'T':
+ case 't':
+ ret <<= 10;
case 'G':
case 'g':
ret <<= 10;
diff --git a/lib/crc64_ecma.c b/lib/crc64_ecma.c
new file mode 100644
index 000000000000..41629ea5a60c
--- /dev/null
+++ b/lib/crc64_ecma.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/crc64_ecma.h>
+
+
+#define CRC64_BYTE_MASK 0xFF
+#define CRC64_TABLE_SIZE 256
+
+
+struct crc64_table {
+ u64 seed;
+ u64 table[CRC64_TABLE_SIZE];
+};
+
+
+static struct crc64_table CRC64_ECMA_182 = {
+ CRC64_DEFAULT_INITVAL,
+ {
+ 0x0000000000000000ULL,
+ 0xb32e4cbe03a75f6fULL,
+ 0xf4843657a840a05bULL,
+ 0x47aa7ae9abe7ff34ULL,
+ 0x7bd0c384ff8f5e33ULL,
+ 0xc8fe8f3afc28015cULL,
+ 0x8f54f5d357cffe68ULL,
+ 0x3c7ab96d5468a107ULL,
+ 0xf7a18709ff1ebc66ULL,
+ 0x448fcbb7fcb9e309ULL,
+ 0x0325b15e575e1c3dULL,
+ 0xb00bfde054f94352ULL,
+ 0x8c71448d0091e255ULL,
+ 0x3f5f08330336bd3aULL,
+ 0x78f572daa8d1420eULL,
+ 0xcbdb3e64ab761d61ULL,
+ 0x7d9ba13851336649ULL,
+ 0xceb5ed8652943926ULL,
+ 0x891f976ff973c612ULL,
+ 0x3a31dbd1fad4997dULL,
+ 0x064b62bcaebc387aULL,
+ 0xb5652e02ad1b6715ULL,
+ 0xf2cf54eb06fc9821ULL,
+ 0x41e11855055bc74eULL,
+ 0x8a3a2631ae2dda2fULL,
+ 0x39146a8fad8a8540ULL,
+ 0x7ebe1066066d7a74ULL,
+ 0xcd905cd805ca251bULL,
+ 0xf1eae5b551a2841cULL,
+ 0x42c4a90b5205db73ULL,
+ 0x056ed3e2f9e22447ULL,
+ 0xb6409f5cfa457b28ULL,
+ 0xfb374270a266cc92ULL,
+ 0x48190ecea1c193fdULL,
+ 0x0fb374270a266cc9ULL,
+ 0xbc9d3899098133a6ULL,
+ 0x80e781f45de992a1ULL,
+ 0x33c9cd4a5e4ecdceULL,
+ 0x7463b7a3f5a932faULL,
+ 0xc74dfb1df60e6d95ULL,
+ 0x0c96c5795d7870f4ULL,
+ 0xbfb889c75edf2f9bULL,
+ 0xf812f32ef538d0afULL,
+ 0x4b3cbf90f69f8fc0ULL,
+ 0x774606fda2f72ec7ULL,
+ 0xc4684a43a15071a8ULL,
+ 0x83c230aa0ab78e9cULL,
+ 0x30ec7c140910d1f3ULL,
+ 0x86ace348f355aadbULL,
+ 0x3582aff6f0f2f5b4ULL,
+ 0x7228d51f5b150a80ULL,
+ 0xc10699a158b255efULL,
+ 0xfd7c20cc0cdaf4e8ULL,
+ 0x4e526c720f7dab87ULL,
+ 0x09f8169ba49a54b3ULL,
+ 0xbad65a25a73d0bdcULL,
+ 0x710d64410c4b16bdULL,
+ 0xc22328ff0fec49d2ULL,
+ 0x85895216a40bb6e6ULL,
+ 0x36a71ea8a7ace989ULL,
+ 0x0adda7c5f3c4488eULL,
+ 0xb9f3eb7bf06317e1ULL,
+ 0xfe5991925b84e8d5ULL,
+ 0x4d77dd2c5823b7baULL,
+ 0x64b62bcaebc387a1ULL,
+ 0xd7986774e864d8ceULL,
+ 0x90321d9d438327faULL,
+ 0x231c512340247895ULL,
+ 0x1f66e84e144cd992ULL,
+ 0xac48a4f017eb86fdULL,
+ 0xebe2de19bc0c79c9ULL,
+ 0x58cc92a7bfab26a6ULL,
+ 0x9317acc314dd3bc7ULL,
+ 0x2039e07d177a64a8ULL,
+ 0x67939a94bc9d9b9cULL,
+ 0xd4bdd62abf3ac4f3ULL,
+ 0xe8c76f47eb5265f4ULL,
+ 0x5be923f9e8f53a9bULL,
+ 0x1c4359104312c5afULL,
+ 0xaf6d15ae40b59ac0ULL,
+ 0x192d8af2baf0e1e8ULL,
+ 0xaa03c64cb957be87ULL,
+ 0xeda9bca512b041b3ULL,
+ 0x5e87f01b11171edcULL,
+ 0x62fd4976457fbfdbULL,
+ 0xd1d305c846d8e0b4ULL,
+ 0x96797f21ed3f1f80ULL,
+ 0x2557339fee9840efULL,
+ 0xee8c0dfb45ee5d8eULL,
+ 0x5da24145464902e1ULL,
+ 0x1a083bacedaefdd5ULL,
+ 0xa9267712ee09a2baULL,
+ 0x955cce7fba6103bdULL,
+ 0x267282c1b9c65cd2ULL,
+ 0x61d8f8281221a3e6ULL,
+ 0xd2f6b4961186fc89ULL,
+ 0x9f8169ba49a54b33ULL,
+ 0x2caf25044a02145cULL,
+ 0x6b055fede1e5eb68ULL,
+ 0xd82b1353e242b407ULL,
+ 0xe451aa3eb62a1500ULL,
+ 0x577fe680b58d4a6fULL,
+ 0x10d59c691e6ab55bULL,
+ 0xa3fbd0d71dcdea34ULL,
+ 0x6820eeb3b6bbf755ULL,
+ 0xdb0ea20db51ca83aULL,
+ 0x9ca4d8e41efb570eULL,
+ 0x2f8a945a1d5c0861ULL,
+ 0x13f02d374934a966ULL,
+ 0xa0de61894a93f609ULL,
+ 0xe7741b60e174093dULL,
+ 0x545a57dee2d35652ULL,
+ 0xe21ac88218962d7aULL,
+ 0x5134843c1b317215ULL,
+ 0x169efed5b0d68d21ULL,
+ 0xa5b0b26bb371d24eULL,
+ 0x99ca0b06e7197349ULL,
+ 0x2ae447b8e4be2c26ULL,
+ 0x6d4e3d514f59d312ULL,
+ 0xde6071ef4cfe8c7dULL,
+ 0x15bb4f8be788911cULL,
+ 0xa6950335e42fce73ULL,
+ 0xe13f79dc4fc83147ULL,
+ 0x521135624c6f6e28ULL,
+ 0x6e6b8c0f1807cf2fULL,
+ 0xdd45c0b11ba09040ULL,
+ 0x9aefba58b0476f74ULL,
+ 0x29c1f6e6b3e0301bULL,
+ 0xc96c5795d7870f42ULL,
+ 0x7a421b2bd420502dULL,
+ 0x3de861c27fc7af19ULL,
+ 0x8ec62d7c7c60f076ULL,
+ 0xb2bc941128085171ULL,
+ 0x0192d8af2baf0e1eULL,
+ 0x4638a2468048f12aULL,
+ 0xf516eef883efae45ULL,
+ 0x3ecdd09c2899b324ULL,
+ 0x8de39c222b3eec4bULL,
+ 0xca49e6cb80d9137fULL,
+ 0x7967aa75837e4c10ULL,
+ 0x451d1318d716ed17ULL,
+ 0xf6335fa6d4b1b278ULL,
+ 0xb199254f7f564d4cULL,
+ 0x02b769f17cf11223ULL,
+ 0xb4f7f6ad86b4690bULL,
+ 0x07d9ba1385133664ULL,
+ 0x4073c0fa2ef4c950ULL,
+ 0xf35d8c442d53963fULL,
+ 0xcf273529793b3738ULL,
+ 0x7c0979977a9c6857ULL,
+ 0x3ba3037ed17b9763ULL,
+ 0x888d4fc0d2dcc80cULL,
+ 0x435671a479aad56dULL,
+ 0xf0783d1a7a0d8a02ULL,
+ 0xb7d247f3d1ea7536ULL,
+ 0x04fc0b4dd24d2a59ULL,
+ 0x3886b22086258b5eULL,
+ 0x8ba8fe9e8582d431ULL,
+ 0xcc0284772e652b05ULL,
+ 0x7f2cc8c92dc2746aULL,
+ 0x325b15e575e1c3d0ULL,
+ 0x8175595b76469cbfULL,
+ 0xc6df23b2dda1638bULL,
+ 0x75f16f0cde063ce4ULL,
+ 0x498bd6618a6e9de3ULL,
+ 0xfaa59adf89c9c28cULL,
+ 0xbd0fe036222e3db8ULL,
+ 0x0e21ac88218962d7ULL,
+ 0xc5fa92ec8aff7fb6ULL,
+ 0x76d4de52895820d9ULL,
+ 0x317ea4bb22bfdfedULL,
+ 0x8250e80521188082ULL,
+ 0xbe2a516875702185ULL,
+ 0x0d041dd676d77eeaULL,
+ 0x4aae673fdd3081deULL,
+ 0xf9802b81de97deb1ULL,
+ 0x4fc0b4dd24d2a599ULL,
+ 0xfceef8632775faf6ULL,
+ 0xbb44828a8c9205c2ULL,
+ 0x086ace348f355aadULL,
+ 0x34107759db5dfbaaULL,
+ 0x873e3be7d8faa4c5ULL,
+ 0xc094410e731d5bf1ULL,
+ 0x73ba0db070ba049eULL,
+ 0xb86133d4dbcc19ffULL,
+ 0x0b4f7f6ad86b4690ULL,
+ 0x4ce50583738cb9a4ULL,
+ 0xffcb493d702be6cbULL,
+ 0xc3b1f050244347ccULL,
+ 0x709fbcee27e418a3ULL,
+ 0x3735c6078c03e797ULL,
+ 0x841b8ab98fa4b8f8ULL,
+ 0xadda7c5f3c4488e3ULL,
+ 0x1ef430e13fe3d78cULL,
+ 0x595e4a08940428b8ULL,
+ 0xea7006b697a377d7ULL,
+ 0xd60abfdbc3cbd6d0ULL,
+ 0x6524f365c06c89bfULL,
+ 0x228e898c6b8b768bULL,
+ 0x91a0c532682c29e4ULL,
+ 0x5a7bfb56c35a3485ULL,
+ 0xe955b7e8c0fd6beaULL,
+ 0xaeffcd016b1a94deULL,
+ 0x1dd181bf68bdcbb1ULL,
+ 0x21ab38d23cd56ab6ULL,
+ 0x9285746c3f7235d9ULL,
+ 0xd52f0e859495caedULL,
+ 0x6601423b97329582ULL,
+ 0xd041dd676d77eeaaULL,
+ 0x636f91d96ed0b1c5ULL,
+ 0x24c5eb30c5374ef1ULL,
+ 0x97eba78ec690119eULL,
+ 0xab911ee392f8b099ULL,
+ 0x18bf525d915feff6ULL,
+ 0x5f1528b43ab810c2ULL,
+ 0xec3b640a391f4fadULL,
+ 0x27e05a6e926952ccULL,
+ 0x94ce16d091ce0da3ULL,
+ 0xd3646c393a29f297ULL,
+ 0x604a2087398eadf8ULL,
+ 0x5c3099ea6de60cffULL,
+ 0xef1ed5546e415390ULL,
+ 0xa8b4afbdc5a6aca4ULL,
+ 0x1b9ae303c601f3cbULL,
+ 0x56ed3e2f9e224471ULL,
+ 0xe5c372919d851b1eULL,
+ 0xa26908783662e42aULL,
+ 0x114744c635c5bb45ULL,
+ 0x2d3dfdab61ad1a42ULL,
+ 0x9e13b115620a452dULL,
+ 0xd9b9cbfcc9edba19ULL,
+ 0x6a978742ca4ae576ULL,
+ 0xa14cb926613cf817ULL,
+ 0x1262f598629ba778ULL,
+ 0x55c88f71c97c584cULL,
+ 0xe6e6c3cfcadb0723ULL,
+ 0xda9c7aa29eb3a624ULL,
+ 0x69b2361c9d14f94bULL,
+ 0x2e184cf536f3067fULL,
+ 0x9d36004b35545910ULL,
+ 0x2b769f17cf112238ULL,
+ 0x9858d3a9ccb67d57ULL,
+ 0xdff2a94067518263ULL,
+ 0x6cdce5fe64f6dd0cULL,
+ 0x50a65c93309e7c0bULL,
+ 0xe388102d33392364ULL,
+ 0xa4226ac498dedc50ULL,
+ 0x170c267a9b79833fULL,
+ 0xdcd7181e300f9e5eULL,
+ 0x6ff954a033a8c131ULL,
+ 0x28532e49984f3e05ULL,
+ 0x9b7d62f79be8616aULL,
+ 0xa707db9acf80c06dULL,
+ 0x14299724cc279f02ULL,
+ 0x5383edcd67c06036ULL,
+ 0xe0ada17364673f59ULL
+ }
+};
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void)
+{
+ return CRC64_ECMA_182.seed;
+}
+EXPORT_SYMBOL(crc64_ecma_seed);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * pdata: pointer to the data to compute checksum for.
+ * nbytes: number of bytes in data buffer.
+ * seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed)
+{
+ unsigned int i;
+ u64 crc = seed;
+
+ for (i = 0; i < nbytes; i++)
+ crc = CRC64_ECMA_182.table[(crc ^ pdata[i]) & CRC64_BYTE_MASK] ^
+ (crc >> 8);
+
+ return crc;
+}
+EXPORT_SYMBOL(crc64_ecma);
+
+MODULE_DESCRIPTION("CRC64 ECMA function");
+MODULE_AUTHOR("Freescale Semiconductor Inc.");
+MODULE_LICENSE("GPL");
diff --git a/lib/decompress.c b/lib/decompress.c
index 86069d74c062..37f3c786348f 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -54,7 +54,7 @@ static const struct compress_format compressed_formats[] __initconst = {
{ {0, 0}, NULL, NULL }
};
-decompress_fn __init decompress_method(const unsigned char *inbuf, int len,
+decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
const char **name)
{
const struct compress_format *cf;
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 31c5f7675fbf..8290e0bef7ea 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -92,8 +92,8 @@ struct bunzip_data {
/* State for interrupting output loop */
int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
/* I/O tracking data (file handles, buffers, positions, etc.) */
- int (*fill)(void*, unsigned int);
- int inbufCount, inbufPos /*, outbufPos*/;
+ long (*fill)(void*, unsigned long);
+ long inbufCount, inbufPos /*, outbufPos*/;
unsigned char *inbuf /*,*outbuf*/;
unsigned int inbufBitCount, inbufBits;
/* The CRC values stored in the block header and calculated from the
@@ -617,7 +617,7 @@ decode_next_byte:
goto decode_next_byte;
}
-static int INIT nofill(void *buf, unsigned int len)
+static long INIT nofill(void *buf, unsigned long len)
{
return -1;
}
@@ -625,8 +625,8 @@ static int INIT nofill(void *buf, unsigned int len)
/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain
a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
ignored, and data is read from file handle into temporary buffer. */
-static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
- int (*fill)(void*, unsigned int))
+static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len,
+ long (*fill)(void*, unsigned long))
{
struct bunzip_data *bd;
unsigned int i, j, c;
@@ -675,11 +675,11 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,
not end of file.) */
-STATIC int INIT bunzip2(unsigned char *buf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT bunzip2(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *outbuf,
- int *pos,
+ long *pos,
void(*error)(char *x))
{
struct bunzip_data *bd;
@@ -743,11 +743,11 @@ exit_0:
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *outbuf,
- int *pos,
+ long *pos,
void(*error)(char *x))
{
return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index 0edfd742a154..d4c7891635ec 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -27,17 +27,17 @@
#define GZIP_IOBUF_SIZE (16*1024)
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
{
return -1;
}
/* Included from initramfs et al code */
-STATIC int INIT gunzip(unsigned char *buf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT gunzip(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *out_buf,
- int *pos,
+ long *pos,
void(*error)(char *x)) {
u8 *zbuf;
struct z_stream_s *strm;
@@ -142,7 +142,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
/* Write any data generated */
if (flush && strm->next_out > out_buf) {
- int l = strm->next_out - out_buf;
+ long l = strm->next_out - out_buf;
if (l != flush(out_buf, l)) {
rc = -1;
error("write error");
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 7d1e83caf8ad..40f66ebe57b7 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -31,10 +31,10 @@
#define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20)
#define ARCHIVE_MAGICNUMBER 0x184C2102
-STATIC inline int INIT unlz4(u8 *input, int in_len,
- int (*fill) (void *, unsigned int),
- int (*flush) (void *, unsigned int),
- u8 *output, int *posp,
+STATIC inline int INIT unlz4(u8 *input, long in_len,
+ long (*fill)(void *, unsigned long),
+ long (*flush)(void *, unsigned long),
+ u8 *output, long *posp,
void (*error) (char *x))
{
int ret = -1;
@@ -43,7 +43,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
u8 *inp;
u8 *inp_start;
u8 *outp;
- int size = in_len;
+ long size = in_len;
#ifdef PREBOOT
size_t out_len = get_unaligned_le32(input + in_len);
#endif
@@ -83,13 +83,20 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
if (posp)
*posp = 0;
- if (fill)
- fill(inp, 4);
+ if (fill) {
+ size = fill(inp, 4);
+ if (size < 4) {
+ error("data corrupted");
+ goto exit_2;
+ }
+ }
chunksize = get_unaligned_le32(inp);
if (chunksize == ARCHIVE_MAGICNUMBER) {
- inp += 4;
- size -= 4;
+ if (!fill) {
+ inp += 4;
+ size -= 4;
+ }
} else {
error("invalid header");
goto exit_2;
@@ -100,29 +107,44 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
for (;;) {
- if (fill)
- fill(inp, 4);
+ if (fill) {
+ size = fill(inp, 4);
+ if (size == 0)
+ break;
+ if (size < 4) {
+ error("data corrupted");
+ goto exit_2;
+ }
+ }
chunksize = get_unaligned_le32(inp);
if (chunksize == ARCHIVE_MAGICNUMBER) {
- inp += 4;
- size -= 4;
+ if (!fill) {
+ inp += 4;
+ size -= 4;
+ }
if (posp)
*posp += 4;
continue;
}
- inp += 4;
- size -= 4;
+
if (posp)
*posp += 4;
- if (fill) {
+ if (!fill) {
+ inp += 4;
+ size -= 4;
+ } else {
if (chunksize > lz4_compressbound(uncomp_chunksize)) {
error("chunk length is longer than allocated");
goto exit_2;
}
- fill(inp, chunksize);
+ size = fill(inp, chunksize);
+ if (size < chunksize) {
+ error("data corrupted");
+ goto exit_2;
+ }
}
#ifdef PREBOOT
if (out_len >= uncomp_chunksize) {
@@ -149,18 +171,17 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
if (posp)
*posp += chunksize;
- size -= chunksize;
+ if (!fill) {
+ size -= chunksize;
- if (size == 0)
- break;
- else if (size < 0) {
- error("data corrupted");
- goto exit_2;
+ if (size == 0)
+ break;
+ else if (size < 0) {
+ error("data corrupted");
+ goto exit_2;
+ }
+ inp += chunksize;
}
-
- inp += chunksize;
- if (fill)
- inp = inp_start;
}
ret = 0;
@@ -175,11 +196,11 @@ exit_0:
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
)
{
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 32adb73a9038..0be83af62b88 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -65,11 +65,11 @@ static long long INIT read_int(unsigned char *ptr, int size)
#define LZMA_IOBUF_SIZE 0x10000
struct rc {
- int (*fill)(void*, unsigned int);
+ long (*fill)(void*, unsigned long);
uint8_t *ptr;
uint8_t *buffer;
uint8_t *buffer_end;
- int buffer_size;
+ long buffer_size;
uint32_t code;
uint32_t range;
uint32_t bound;
@@ -82,7 +82,7 @@ struct rc {
#define RC_MODEL_TOTAL_BITS 11
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
{
return -1;
}
@@ -99,8 +99,8 @@ static void INIT rc_read(struct rc *rc)
/* Called once */
static inline void INIT rc_init(struct rc *rc,
- int (*fill)(void*, unsigned int),
- char *buffer, int buffer_size)
+ long (*fill)(void*, unsigned long),
+ char *buffer, long buffer_size)
{
if (fill)
rc->fill = fill;
@@ -280,7 +280,7 @@ struct writer {
size_t buffer_pos;
int bufsize;
size_t global_pos;
- int(*flush)(void*, unsigned int);
+ long (*flush)(void*, unsigned long);
struct lzma_header *header;
};
@@ -534,11 +534,11 @@ static inline int INIT process_bit1(struct writer *wr, struct rc *rc,
-STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC inline int INIT unlzma(unsigned char *buf, long in_len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
)
{
@@ -667,11 +667,11 @@ exit_0:
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
)
{
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 960183d4258f..b94a31bdd87d 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -51,7 +51,7 @@ static const unsigned char lzop_magic[] = {
#define HEADER_SIZE_MIN (9 + 7 + 4 + 8 + 1 + 4)
#define HEADER_SIZE_MAX (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4)
-STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
+STATIC inline long INIT parse_header(u8 *input, long *skip, long in_len)
{
int l;
u8 *parse = input;
@@ -108,14 +108,14 @@ STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
return 1;
}
-STATIC inline int INIT unlzo(u8 *input, int in_len,
- int (*fill) (void *, unsigned int),
- int (*flush) (void *, unsigned int),
- u8 *output, int *posp,
+STATIC int INIT unlzo(u8 *input, long in_len,
+ long (*fill)(void *, unsigned long),
+ long (*flush)(void *, unsigned long),
+ u8 *output, long *posp,
void (*error) (char *x))
{
u8 r = 0;
- int skip = 0;
+ long skip = 0;
u32 src_len, dst_len;
size_t tmp;
u8 *in_buf, *in_buf_save, *out_buf;
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
index 9f34eb56854d..b07a78340e9d 100644
--- a/lib/decompress_unxz.c
+++ b/lib/decompress_unxz.c
@@ -248,10 +248,10 @@ void *memmove(void *dest, const void *src, size_t size)
* both input and output buffers are available as a single chunk, i.e. when
* fill() and flush() won't be used.
*/
-STATIC int INIT unxz(unsigned char *in, int in_size,
- int (*fill)(void *dest, unsigned int size),
- int (*flush)(void *src, unsigned int size),
- unsigned char *out, int *in_used,
+STATIC int INIT unxz(unsigned char *in, long in_size,
+ long (*fill)(void *dest, unsigned long size),
+ long (*flush)(void *src, unsigned long size),
+ unsigned char *out, long *in_used,
void (*error)(char *x))
{
struct xz_buf b;
@@ -329,7 +329,7 @@ STATIC int INIT unxz(unsigned char *in, int in_size,
* returned by xz_dec_run(), but probably
* it's not too bad.
*/
- if (flush(b.out, b.out_pos) != (int)b.out_pos)
+ if (flush(b.out, b.out_pos) != (long)b.out_pos)
ret = XZ_BUF_ERROR;
b.out_pos = 0;
diff --git a/lib/glob.c b/lib/glob.c
new file mode 100644
index 000000000000..500fc80d23e1
--- /dev/null
+++ b/lib/glob.c
@@ -0,0 +1,287 @@
+#include <linux/module.h>
+#include <linux/glob.h>
+
+/*
+ * The only reason this code can be compiled as a module is because the
+ * ATA code that depends on it can be as well. In practice, they're
+ * both usually compiled in and the module overhead goes away.
+ */
+MODULE_DESCRIPTION("glob(7) matching");
+MODULE_LICENSE("Dual MIT/GPL");
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match. The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns. Thus, it
+ * does not preprocess the patterns. It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+bool __pure glob_match(char const *pat, char const *str)
+{
+ /*
+ * Backtrack to previous * on mismatch and retry starting one
+ * character later in the string. Because * matches all characters
+ * (no exception for /), it can be easily proved that there's
+ * never a need to backtrack multiple levels.
+ */
+ char const *back_pat = NULL, *back_str = back_str;
+
+ /*
+ * Loop over each token (character or class) in pat, matching
+ * it against the remaining unmatched tail of str. Return false
+ * on mismatch, or true after matching the trailing nul bytes.
+ */
+ for (;;) {
+ unsigned char c = *str++;
+ unsigned char d = *pat++;
+
+ switch (d) {
+ case '?': /* Wildcard: anything but nul */
+ if (c == '\0')
+ return false;
+ break;
+ case '*': /* Any-length wildcard */
+ if (*pat == '\0') /* Optimize trailing * case */
+ return true;
+ back_pat = pat;
+ back_str = --str; /* Allow zero-length match */
+ break;
+ case '[': { /* Character class */
+ bool match = false, inverted = (*pat == '!');
+ char const *class = pat + inverted;
+ unsigned char a = *class++;
+
+ /*
+ * Iterate over each span in the character class.
+ * A span is either a single character a, or a
+ * range a-b. The first span may begin with ']'.
+ */
+ do {
+ unsigned char b = a;
+
+ if (a == '\0') /* Malformed */
+ goto literal;
+
+ if (class[0] == '-' && class[1] != ']') {
+ b = class[1];
+
+ if (b == '\0')
+ goto literal;
+
+ class += 2;
+ /* Any special action if a > b? */
+ }
+ match |= (a <= c && c <= b);
+ } while ((a = *class++) != ']');
+
+ if (match == inverted)
+ goto backtrack;
+ pat = class;
+ }
+ break;
+ case '\\':
+ d = *pat++;
+ /*FALLTHROUGH*/
+ default: /* Literal character */
+literal:
+ if (c == d) {
+ if (d == '\0')
+ return true;
+ break;
+ }
+backtrack:
+ if (c == '\0' || !back_pat)
+ return false; /* No point continuing */
+ /* Try again from last *, one character later in str. */
+ pat = back_pat;
+ str = ++back_str;
+ break;
+ }
+ }
+}
+EXPORT_SYMBOL(glob_match);
+
+
+#ifdef CONFIG_GLOB_SELFTEST
+
+#include <linux/printk.h>
+#include <linux/moduleparam.h>
+
+/* Boot with "glob.verbose=1" to show successful tests, too */
+static bool verbose = false;
+module_param(verbose, bool, 0);
+
+struct glob_test {
+ char const *pat, *str;
+ bool expected;
+};
+
+static bool __pure __init test(char const *pat, char const *str, bool expected)
+{
+ bool match = glob_match(pat, str);
+ bool success = match == expected;
+
+ /* Can't get string literals into a particular section, so... */
+ static char const msg_error[] __initconst =
+ KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
+ static char const msg_ok[] __initconst =
+ KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
+ static char const mismatch[] __initconst = "mismatch";
+ char const *message;
+
+ if (!success)
+ message = msg_error;
+ else if (verbose)
+ message = msg_ok;
+ else
+ return success;
+
+ printk(message, pat, str, mismatch + 3*match);
+ return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section. The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static char const glob_tests[] __initconst =
+ /* Some basic tests */
+ "1" "a\0" "a\0"
+ "0" "a\0" "b\0"
+ "0" "a\0" "aa\0"
+ "0" "a\0" "\0"
+ "1" "\0" "\0"
+ "0" "\0" "a\0"
+ /* Simple character class tests */
+ "1" "[a]\0" "a\0"
+ "0" "[a]\0" "b\0"
+ "0" "[!a]\0" "a\0"
+ "1" "[!a]\0" "b\0"
+ "1" "[ab]\0" "a\0"
+ "1" "[ab]\0" "b\0"
+ "0" "[ab]\0" "c\0"
+ "1" "[!ab]\0" "c\0"
+ "1" "[a-c]\0" "b\0"
+ "0" "[a-c]\0" "d\0"
+ /* Corner cases in character class parsing */
+ "1" "[a-c-e-g]\0" "-\0"
+ "0" "[a-c-e-g]\0" "d\0"
+ "1" "[a-c-e-g]\0" "f\0"
+ "1" "[]a-ceg-ik[]\0" "a\0"
+ "1" "[]a-ceg-ik[]\0" "]\0"
+ "1" "[]a-ceg-ik[]\0" "[\0"
+ "1" "[]a-ceg-ik[]\0" "h\0"
+ "0" "[]a-ceg-ik[]\0" "f\0"
+ "0" "[!]a-ceg-ik[]\0" "h\0"
+ "0" "[!]a-ceg-ik[]\0" "]\0"
+ "1" "[!]a-ceg-ik[]\0" "f\0"
+ /* Simple wild cards */
+ "1" "?\0" "a\0"
+ "0" "?\0" "aa\0"
+ "0" "??\0" "a\0"
+ "1" "?x?\0" "axb\0"
+ "0" "?x?\0" "abx\0"
+ "0" "?x?\0" "xab\0"
+ /* Asterisk wild cards (backtracking) */
+ "0" "*??\0" "a\0"
+ "1" "*??\0" "ab\0"
+ "1" "*??\0" "abc\0"
+ "1" "*??\0" "abcd\0"
+ "0" "??*\0" "a\0"
+ "1" "??*\0" "ab\0"
+ "1" "??*\0" "abc\0"
+ "1" "??*\0" "abcd\0"
+ "0" "?*?\0" "a\0"
+ "1" "?*?\0" "ab\0"
+ "1" "?*?\0" "abc\0"
+ "1" "?*?\0" "abcd\0"
+ "1" "*b\0" "b\0"
+ "1" "*b\0" "ab\0"
+ "0" "*b\0" "ba\0"
+ "1" "*b\0" "bb\0"
+ "1" "*b\0" "abb\0"
+ "1" "*b\0" "bab\0"
+ "1" "*bc\0" "abbc\0"
+ "1" "*bc\0" "bc\0"
+ "1" "*bc\0" "bbc\0"
+ "1" "*bc\0" "bcbc\0"
+ /* Multiple asterisks (complex backtracking) */
+ "1" "*ac*\0" "abacadaeafag\0"
+ "1" "*ac*ae*ag*\0" "abacadaeafag\0"
+ "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+ "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+ "1" "*abcd*\0" "abcabcabcabcdefg\0"
+ "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+ "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+ "0" "*abcd*\0" "abcabcabcabcefg\0"
+ "0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+static int __init glob_init(void)
+{
+ unsigned successes = 0;
+ unsigned n = 0;
+ char const *p = glob_tests;
+ static char const message[] __initconst =
+ KERN_INFO "glob: %u self-tests passed, %u failed\n";
+
+ /*
+ * Tests are jammed together in a string. The first byte is '1'
+ * or '0' to indicate the expected outcome, or '\0' to indicate the
+ * end of the tests. Then come two null-terminated strings: the
+ * pattern and the string to match it against.
+ */
+ while (*p) {
+ bool expected = *p++ & 1;
+ char const *pat = p;
+
+ p += strlen(p) + 1;
+ successes += test(pat, p, expected);
+ p += strlen(p) + 1;
+ n++;
+ }
+
+ n -= successes;
+ printk(message, successes, n);
+
+ /* What's the errno for "kernel bug detected"? Guess... */
+ return n ? -ECANCELED : 0;
+}
+
+/* We need a dummy exit function to allow unload */
+static void __exit glob_fini(void) { }
+
+module_init(glob_init);
+module_exit(glob_fini);
+
+#endif /* CONFIG_GLOB_SELFTEST */
diff --git a/lib/idr.c b/lib/idr.c
index 39158abebad1..50be3fa9b657 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -590,26 +590,27 @@ static void __idr_remove_all(struct idr *idp)
struct idr_layer **paa = &pa[0];
n = idp->layers * IDR_BITS;
- p = idp->top;
+ *paa = idp->top;
RCU_INIT_POINTER(idp->top, NULL);
max = idr_max(idp->layers);
id = 0;
while (id >= 0 && id <= max) {
+ p = *paa;
while (n > IDR_BITS && p) {
n -= IDR_BITS;
- *paa++ = p;
p = p->ary[(id >> n) & IDR_MASK];
+ *++paa = p;
}
bt_mask = id;
id += 1 << n;
/* Get the highest bit that the above add changed from 0->1. */
while (n < fls(id ^ bt_mask)) {
- if (p)
- free_layer(idp, p);
+ if (*paa)
+ free_layer(idp, *paa);
n += IDR_BITS;
- p = *--paa;
+ --paa;
}
}
idp->layers = 0;
@@ -692,15 +693,16 @@ int idr_for_each(struct idr *idp,
struct idr_layer **paa = &pa[0];
n = idp->layers * IDR_BITS;
- p = rcu_dereference_raw(idp->top);
+ *paa = rcu_dereference_raw(idp->top);
max = idr_max(idp->layers);
id = 0;
while (id >= 0 && id <= max) {
+ p = *paa;
while (n > 0 && p) {
n -= IDR_BITS;
- *paa++ = p;
p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+ *++paa = p;
}
if (p) {
@@ -712,7 +714,7 @@ int idr_for_each(struct idr *idp,
id += 1 << n;
while (n < fls(id)) {
n += IDR_BITS;
- p = *--paa;
+ --paa;
}
}
@@ -740,17 +742,18 @@ void *idr_get_next(struct idr *idp, int *nextidp)
int n, max;
/* find first ent */
- p = rcu_dereference_raw(idp->top);
+ p = *paa = rcu_dereference_raw(idp->top);
if (!p)
return NULL;
n = (p->layer + 1) * IDR_BITS;
max = idr_max(p->layer + 1);
while (id >= 0 && id <= max) {
+ p = *paa;
while (n > 0 && p) {
n -= IDR_BITS;
- *paa++ = p;
p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+ *++paa = p;
}
if (p) {
@@ -768,7 +771,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
id = round_up(id + 1, 1 << n);
while (n < fls(id)) {
n += IDR_BITS;
- p = *--paa;
+ --paa;
}
}
return NULL;
diff --git a/lib/kfifo.c b/lib/kfifo.c
index d79b9d222065..90ba1eb1df06 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -561,8 +561,7 @@ EXPORT_SYMBOL(__kfifo_to_user_r);
unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo,
struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
{
- if (!nents)
- BUG();
+ BUG_ON(!nents);
len = __kfifo_max_r(len, recsize);
@@ -585,8 +584,7 @@ EXPORT_SYMBOL(__kfifo_dma_in_finish_r);
unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo,
struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
{
- if (!nents)
- BUG();
+ BUG_ON(!nents);
len = __kfifo_max_r(len, recsize);
diff --git a/lib/klist.c b/lib/klist.c
index 358a368a2947..89b485a2a58d 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k)
EXPORT_SYMBOL_GPL(klist_add_tail);
/**
- * klist_add_after - Init a klist_node and add it after an existing node
+ * klist_add_behind - Init a klist_node and add it after an existing node
* @n: node we're adding.
* @pos: node to put @n after
*/
-void klist_add_after(struct klist_node *n, struct klist_node *pos)
+void klist_add_behind(struct klist_node *n, struct klist_node *pos)
{
struct klist *k = knode_klist(pos);
@@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos)
list_add(&n->n_node, &pos->n_node);
spin_unlock(&k->k_lock);
}
-EXPORT_SYMBOL_GPL(klist_add_after);
+EXPORT_SYMBOL_GPL(klist_add_behind);
/**
* klist_add_before - Init a klist_node and add it before an existing node
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 1183fa70a44d..12bcba1c8612 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -1,3 +1,6 @@
+
+#define pr_fmt(fmt) "list_sort_test: " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/list_sort.h>
@@ -47,6 +50,7 @@ static void merge_and_restore_back_links(void *priv,
struct list_head *a, struct list_head *b)
{
struct list_head *tail = head;
+ u8 count = 0;
while (a && b) {
/* if equal, take 'a' -- important for sort stability */
@@ -70,7 +74,8 @@ static void merge_and_restore_back_links(void *priv,
* element comparison is needed, so the client's cmp()
* routine can invoke cond_resched() periodically.
*/
- (*cmp)(priv, tail->next, tail->next);
+ if (unlikely(!(++count)))
+ (*cmp)(priv, tail->next, tail->next);
tail->next->prev = tail;
tail = tail->next;
@@ -123,9 +128,7 @@ void list_sort(void *priv, struct list_head *head,
}
if (lev > max_lev) {
if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
- printk_once(KERN_DEBUG "list passed to"
- " list_sort() too long for"
- " efficiency\n");
+ printk_once(KERN_DEBUG "list too long for efficiency\n");
lev--;
}
max_lev = lev;
@@ -168,27 +171,25 @@ static struct debug_el **elts __initdata;
static int __init check(struct debug_el *ela, struct debug_el *elb)
{
if (ela->serial >= TEST_LIST_LEN) {
- printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
- ela->serial);
+ pr_err("error: incorrect serial %d\n", ela->serial);
return -EINVAL;
}
if (elb->serial >= TEST_LIST_LEN) {
- printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
- elb->serial);
+ pr_err("error: incorrect serial %d\n", elb->serial);
return -EINVAL;
}
if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
- printk(KERN_ERR "list_sort_test: error: phantom element\n");
+ pr_err("error: phantom element\n");
return -EINVAL;
}
if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
- printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
- ela->poison1, ela->poison2);
+ pr_err("error: bad poison: %#x/%#x\n",
+ ela->poison1, ela->poison2);
return -EINVAL;
}
if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
- printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
- elb->poison1, elb->poison2);
+ pr_err("error: bad poison: %#x/%#x\n",
+ elb->poison1, elb->poison2);
return -EINVAL;
}
return 0;
@@ -207,25 +208,23 @@ static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
static int __init list_sort_test(void)
{
- int i, count = 1, err = -EINVAL;
+ int i, count = 1, err = -ENOMEM;
struct debug_el *el;
- struct list_head *cur, *tmp;
+ struct list_head *cur;
LIST_HEAD(head);
- printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
+ pr_debug("start testing list_sort()\n");
- elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
+ elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL);
if (!elts) {
- printk(KERN_ERR "list_sort_test: error: cannot allocate "
- "memory\n");
- goto exit;
+ pr_err("error: cannot allocate memory\n");
+ return err;
}
for (i = 0; i < TEST_LIST_LEN; i++) {
el = kmalloc(sizeof(*el), GFP_KERNEL);
if (!el) {
- printk(KERN_ERR "list_sort_test: error: cannot "
- "allocate memory\n");
+ pr_err("error: cannot allocate memory\n");
goto exit;
}
/* force some equivalencies */
@@ -239,52 +238,52 @@ static int __init list_sort_test(void)
list_sort(NULL, &head, cmp);
+ err = -EINVAL;
for (cur = head.next; cur->next != &head; cur = cur->next) {
struct debug_el *el1;
int cmp_result;
if (cur->next->prev != cur) {
- printk(KERN_ERR "list_sort_test: error: list is "
- "corrupted\n");
+ pr_err("error: list is corrupted\n");
goto exit;
}
cmp_result = cmp(NULL, cur, cur->next);
if (cmp_result > 0) {
- printk(KERN_ERR "list_sort_test: error: list is not "
- "sorted\n");
+ pr_err("error: list is not sorted\n");
goto exit;
}
el = container_of(cur, struct debug_el, list);
el1 = container_of(cur->next, struct debug_el, list);
if (cmp_result == 0 && el->serial >= el1->serial) {
- printk(KERN_ERR "list_sort_test: error: order of "
- "equivalent elements not preserved\n");
+ pr_err("error: order of equivalent elements not "
+ "preserved\n");
goto exit;
}
if (check(el, el1)) {
- printk(KERN_ERR "list_sort_test: error: element check "
- "failed\n");
+ pr_err("error: element check failed\n");
goto exit;
}
count++;
}
+ if (head.prev != cur) {
+ pr_err("error: list is corrupted\n");
+ goto exit;
+ }
+
if (count != TEST_LIST_LEN) {
- printk(KERN_ERR "list_sort_test: error: bad list length %d",
- count);
+ pr_err("error: bad list length %d", count);
goto exit;
}
err = 0;
exit:
+ for (i = 0; i < TEST_LIST_LEN; i++)
+ kfree(elts[i]);
kfree(elts);
- list_for_each_safe(cur, tmp, &head) {
- list_del(cur);
- kfree(container_of(cur, struct debug_el, list));
- }
return err;
}
module_init(list_sort_test);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 65f4effd117f..c16c81a3d430 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -101,7 +101,7 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
* / \ / \
* p u --> P U
* / /
- * n N
+ * n n
*
* However, since g's parent might be red, and
* 4) does not allow this, we need to recurse
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index b4415fceb7e7..9cdf62f8accd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(sg_nents);
**/
struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
{
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
struct scatterlist *ret = &sgl[nents - 1];
#else
struct scatterlist *sg, *ret = NULL;
@@ -255,7 +255,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
if (nents == 0)
return -EINVAL;
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
if (WARN_ON_ONCE(nents > max_ents))
return -EINVAL;
#endif
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index ed5c1454dd62..29033f319aea 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -25,12 +25,15 @@
int string_get_size(u64 size, const enum string_size_units units,
char *buf, int len)
{
- static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB",
- "EB", "ZB", "YB", NULL};
- static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
- "EiB", "ZiB", "YiB", NULL };
- static const char **units_str[] = {
- [STRING_UNITS_10] = units_10,
+ static const char *const units_10[] = {
+ "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL
+ };
+ static const char *const units_2[] = {
+ "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB",
+ NULL
+ };
+ static const char *const *const units_str[] = {
+ [STRING_UNITS_10] = units_10,
[STRING_UNITS_2] = units_2,
};
static const unsigned int divisor[] = {
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
index bea3f3fa3f02..4137bca5f8e8 100644
--- a/lib/test-kstrtox.c
+++ b/lib/test-kstrtox.c
@@ -3,7 +3,7 @@
#include <linux/module.h>
#define for_each_test(i, test) \
- for (i = 0; i < sizeof(test) / sizeof(test[0]); i++)
+ for (i = 0; i < ARRAY_SIZE(test); i++)
struct test_fail {
const char *str;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 6fe2c84eb055..0eced40344dd 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1183,6 +1183,21 @@ char *address_val(char *buf, char *end, const void *addr,
return number(buf, end, num, spec);
}
+static noinline_for_stack
+char *comm_name(char *buf, char *end, struct task_struct *tsk,
+ struct printf_spec spec, const char *fmt)
+{
+ char name[TASK_COMM_LEN];
+
+ /* Caller can pass NULL instead of current. */
+ if (!tsk)
+ tsk = current;
+ /* Not using get_task_comm() in case I'm in IRQ context. */
+ memcpy(name, tsk->comm, TASK_COMM_LEN);
+ name[sizeof(name) - 1] = '\0';
+ return string(buf, end, name, spec);
+}
+
int kptr_restrict __read_mostly;
/*
@@ -1250,6 +1265,7 @@ int kptr_restrict __read_mostly;
* (default assumed to be phys_addr_t, passed by reference)
* - 'd[234]' For a dentry name (optionally 2-4 last components)
* - 'D[234]' Same as 'd' but for a struct file
+ * - 'T' task_struct->comm
*
* Note: The difference between 'S' and 'F' is that on ia64 and ppc64
* function pointers are really function descriptors, which contain a
@@ -1261,7 +1277,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
{
int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
- if (!ptr && *fmt != 'K') {
+ if (!ptr && *fmt != 'K' && *fmt != 'T') {
/*
* Print (null) with the same width as a pointer so it makes
* tabular output look nice.
@@ -1389,6 +1405,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
return dentry_name(buf, end,
((const struct file *)ptr)->f_path.dentry,
spec, fmt);
+ case 'T':
+ return comm_name(buf, end, ptr, spec, fmt);
}
spec.flags |= SMALL;
if (spec.field_width == -1) {
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index d63381e8e333..d20ef458f137 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -250,52 +250,6 @@ int zlib_deflateInit2(
}
/* ========================================================================= */
-#if 0
-int zlib_deflateSetDictionary(
- z_streamp strm,
- const Byte *dictionary,
- uInt dictLength
-)
-{
- deflate_state *s;
- uInt length = dictLength;
- uInt n;
- IPos hash_head = 0;
-
- if (strm == NULL || strm->state == NULL || dictionary == NULL)
- return Z_STREAM_ERROR;
-
- s = (deflate_state *) strm->state;
- if (s->status != INIT_STATE) return Z_STREAM_ERROR;
-
- strm->adler = zlib_adler32(strm->adler, dictionary, dictLength);
-
- if (length < MIN_MATCH) return Z_OK;
- if (length > MAX_DIST(s)) {
- length = MAX_DIST(s);
-#ifndef USE_DICT_HEAD
- dictionary += dictLength - length; /* use the tail of the dictionary */
-#endif
- }
- memcpy((char *)s->window, dictionary, length);
- s->strstart = length;
- s->block_start = (long)length;
-
- /* Insert all strings in the hash table (except for the last two bytes).
- * s->lookahead stays null, so s->ins_h will be recomputed at the next
- * call of fill_window.
- */
- s->ins_h = s->window[0];
- UPDATE_HASH(s, s->ins_h, s->window[1]);
- for (n = 0; n <= length - MIN_MATCH; n++) {
- INSERT_STRING(s, n, hash_head);
- }
- if (hash_head) hash_head = 0; /* to make compiler happy */
- return Z_OK;
-}
-#endif /* 0 */
-
-/* ========================================================================= */
int zlib_deflateReset(
z_streamp strm
)
@@ -326,45 +280,6 @@ int zlib_deflateReset(
return Z_OK;
}
-/* ========================================================================= */
-#if 0
-int zlib_deflateParams(
- z_streamp strm,
- int level,
- int strategy
-)
-{
- deflate_state *s;
- compress_func func;
- int err = Z_OK;
-
- if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
- s = (deflate_state *) strm->state;
-
- if (level == Z_DEFAULT_COMPRESSION) {
- level = 6;
- }
- if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
- return Z_STREAM_ERROR;
- }
- func = configuration_table[s->level].func;
-
- if (func != configuration_table[level].func && strm->total_in != 0) {
- /* Flush the last buffer: */
- err = zlib_deflate(strm, Z_PARTIAL_FLUSH);
- }
- if (s->level != level) {
- s->level = level;
- s->max_lazy_match = configuration_table[level].max_lazy;
- s->good_match = configuration_table[level].good_length;
- s->nice_match = configuration_table[level].nice_length;
- s->max_chain_length = configuration_table[level].max_chain;
- }
- s->strategy = strategy;
- return err;
-}
-#endif /* 0 */
-
/* =========================================================================
* Put a short in the pending buffer. The 16-bit value is put in MSB order.
* IN assertion: the stream state is correct and there is enough room in
@@ -568,64 +483,6 @@ int zlib_deflateEnd(
return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
}
-/* =========================================================================
- * Copy the source state to the destination state.
- */
-#if 0
-int zlib_deflateCopy (
- z_streamp dest,
- z_streamp source
-)
-{
-#ifdef MAXSEG_64K
- return Z_STREAM_ERROR;
-#else
- deflate_state *ds;
- deflate_state *ss;
- ush *overlay;
- deflate_workspace *mem;
-
-
- if (source == NULL || dest == NULL || source->state == NULL) {
- return Z_STREAM_ERROR;
- }
-
- ss = (deflate_state *) source->state;
-
- *dest = *source;
-
- mem = (deflate_workspace *) dest->workspace;
-
- ds = &(mem->deflate_memory);
-
- dest->state = (struct internal_state *) ds;
- *ds = *ss;
- ds->strm = dest;
-
- ds->window = (Byte *) mem->window_memory;
- ds->prev = (Pos *) mem->prev_memory;
- ds->head = (Pos *) mem->head_memory;
- overlay = (ush *) mem->overlay_memory;
- ds->pending_buf = (uch *) overlay;
-
- memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
- memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
- memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
- memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
-
- ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
- ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
- ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
-
- ds->l_desc.dyn_tree = ds->dyn_ltree;
- ds->d_desc.dyn_tree = ds->dyn_dtree;
- ds->bl_desc.dyn_tree = ds->bl_tree;
-
- return Z_OK;
-#endif
-}
-#endif /* 0 */
-
/* ===========================================================================
* Read a new buffer from the current input stream, update the adler32
* and total number of bytes read. All deflate() input goes through
diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c
index f5ce87b0800e..58a733b10387 100644
--- a/lib/zlib_inflate/inflate.c
+++ b/lib/zlib_inflate/inflate.c
@@ -45,21 +45,6 @@ int zlib_inflateReset(z_streamp strm)
return Z_OK;
}
-#if 0
-int zlib_inflatePrime(z_streamp strm, int bits, int value)
-{
- struct inflate_state *state;
-
- if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state *)strm->state;
- if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
- value &= (1L << bits) - 1;
- state->hold += value << state->bits;
- state->bits += bits;
- return Z_OK;
-}
-#endif
-
int zlib_inflateInit2(z_streamp strm, int windowBits)
{
struct inflate_state *state;
@@ -761,123 +746,6 @@ int zlib_inflateEnd(z_streamp strm)
return Z_OK;
}
-#if 0
-int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary,
- uInt dictLength)
-{
- struct inflate_state *state;
- unsigned long id;
-
- /* check state */
- if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state *)strm->state;
- if (state->wrap != 0 && state->mode != DICT)
- return Z_STREAM_ERROR;
-
- /* check for correct dictionary id */
- if (state->mode == DICT) {
- id = zlib_adler32(0L, NULL, 0);
- id = zlib_adler32(id, dictionary, dictLength);
- if (id != state->check)
- return Z_DATA_ERROR;
- }
-
- /* copy dictionary to window */
- zlib_updatewindow(strm, strm->avail_out);
-
- if (dictLength > state->wsize) {
- memcpy(state->window, dictionary + dictLength - state->wsize,
- state->wsize);
- state->whave = state->wsize;
- }
- else {
- memcpy(state->window + state->wsize - dictLength, dictionary,
- dictLength);
- state->whave = dictLength;
- }
- state->havedict = 1;
- return Z_OK;
-}
-#endif
-
-#if 0
-/*
- Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found
- or when out of input. When called, *have is the number of pattern bytes
- found in order so far, in 0..3. On return *have is updated to the new
- state. If on return *have equals four, then the pattern was found and the
- return value is how many bytes were read including the last byte of the
- pattern. If *have is less than four, then the pattern has not been found
- yet and the return value is len. In the latter case, zlib_syncsearch() can be
- called again with more data and the *have state. *have is initialized to
- zero for the first call.
- */
-static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf,
- unsigned len)
-{
- unsigned got;
- unsigned next;
-
- got = *have;
- next = 0;
- while (next < len && got < 4) {
- if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
- got++;
- else if (buf[next])
- got = 0;
- else
- got = 4 - got;
- next++;
- }
- *have = got;
- return next;
-}
-#endif
-
-#if 0
-int zlib_inflateSync(z_streamp strm)
-{
- unsigned len; /* number of bytes to look at or looked at */
- unsigned long in, out; /* temporary to save total_in and total_out */
- unsigned char buf[4]; /* to restore bit buffer to byte string */
- struct inflate_state *state;
-
- /* check parameters */
- if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state *)strm->state;
- if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
-
- /* if first time, start search in bit buffer */
- if (state->mode != SYNC) {
- state->mode = SYNC;
- state->hold <<= state->bits & 7;
- state->bits -= state->bits & 7;
- len = 0;
- while (state->bits >= 8) {
- buf[len++] = (unsigned char)(state->hold);
- state->hold >>= 8;
- state->bits -= 8;
- }
- state->have = 0;
- zlib_syncsearch(&(state->have), buf, len);
- }
-
- /* search available input */
- len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in);
- strm->avail_in -= len;
- strm->next_in += len;
- strm->total_in += len;
-
- /* return no joy or set up to restart inflate() on a new block */
- if (state->have != 4) return Z_DATA_ERROR;
- in = strm->total_in; out = strm->total_out;
- zlib_inflateReset(strm);
- strm->total_in = in; strm->total_out = out;
- state->mode = TYPE;
- return Z_OK;
-}
-#endif
-
/*
* This subroutine adds the data at next_in/avail_in to the output history
* without performing any output. The output buffer must be "caught up";
diff --git a/mm/Kconfig b/mm/Kconfig
index 3e9977a9d657..886db2158538 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -508,21 +508,34 @@ config CMA_DEBUG
processing calls such as dma_alloc_from_contiguous().
This option does not affect warning and error messages.
-config ZBUD
- tristate
- default n
+config CMA_AREAS
+ int "Maximum count of the CMA areas"
+ depends on CMA
+ default 7
help
- A special purpose allocator for storing compressed pages.
- It is designed to store up to two compressed pages per physical
- page. While this design limits storage density, it has simple and
- deterministic reclaim properties that make it preferable to a higher
- density approach when reclaim will be used.
+ CMA allows to create CMA areas for particular purpose, mainly,
+ used as device private area. This parameter sets the maximum
+ number of CMA area in the system.
+
+ If unsure, leave the default value "7".
+
+config MEM_SOFT_DIRTY
+ bool "Track memory changes"
+ depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
+ select PROC_PAGE_MONITOR
+ help
+ This option enables memory changes tracking by introducing a
+ soft-dirty bit on pte-s. This bit it set when someone writes
+ into a page just as regular dirty bit, but unlike the latter
+ it can be cleared by hands.
+
+ See Documentation/vm/soft-dirty.txt for more details.
config ZSWAP
bool "Compressed cache for swap pages (EXPERIMENTAL)"
depends on FRONTSWAP && CRYPTO=y
select CRYPTO_LZO
- select ZBUD
+ select ZPOOL
default n
help
A lightweight compressed cache for swap pages. It takes
@@ -538,17 +551,22 @@ config ZSWAP
they have not be fully explored on the large set of potential
configurations and workloads that exist.
-config MEM_SOFT_DIRTY
- bool "Track memory changes"
- depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
- select PROC_PAGE_MONITOR
+config ZPOOL
+ tristate "Common API for compressed memory storage"
+ default n
help
- This option enables memory changes tracking by introducing a
- soft-dirty bit on pte-s. This bit it set when someone writes
- into a page just as regular dirty bit, but unlike the latter
- it can be cleared by hands.
+ Compressed memory storage API. This allows using either zbud or
+ zsmalloc.
- See Documentation/vm/soft-dirty.txt for more details.
+config ZBUD
+ tristate "Low density storage for compressed pages"
+ default n
+ help
+ A special purpose allocator for storing compressed pages.
+ It is designed to store up to two compressed pages per physical
+ page. While this design limits storage density, it has simple and
+ deterministic reclaim properties that make it preferable to a higher
+ density approach when reclaim will be used.
config ZSMALLOC
tristate "Memory allocator for compressed pages"
diff --git a/mm/Makefile b/mm/Makefile
index 4064f3ec145e..632ae77e6070 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -59,6 +59,8 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
obj-$(CONFIG_CLEANCACHE) += cleancache.o
obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
+obj-$(CONFIG_ZPOOL) += zpool.o
obj-$(CONFIG_ZBUD) += zbud.o
obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
+obj-$(CONFIG_CMA) += cma.o
diff --git a/mm/cma.c b/mm/cma.c
new file mode 100644
index 000000000000..c17751c0dcaf
--- /dev/null
+++ b/mm/cma.c
@@ -0,0 +1,335 @@
+/*
+ * Contiguous Memory Allocator
+ *
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Copyright IBM Corporation, 2013
+ * Copyright LG Electronics Inc., 2014
+ * Written by:
+ * Marek Szyprowski <m.szyprowski@samsung.com>
+ * Michal Nazarewicz <mina86@mina86.com>
+ * Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ * Joonsoo Kim <iamjoonsoo.kim@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+#define pr_fmt(fmt) "cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+# define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/log2.h>
+#include <linux/cma.h>
+
+struct cma {
+ unsigned long base_pfn;
+ unsigned long count;
+ unsigned long *bitmap;
+ unsigned int order_per_bit; /* Order of pages represented by one bit */
+ struct mutex lock;
+};
+
+static struct cma cma_areas[MAX_CMA_AREAS];
+static unsigned cma_area_count;
+static DEFINE_MUTEX(cma_mutex);
+
+phys_addr_t cma_get_base(struct cma *cma)
+{
+ return PFN_PHYS(cma->base_pfn);
+}
+
+unsigned long cma_get_size(struct cma *cma)
+{
+ return cma->count << PAGE_SHIFT;
+}
+
+static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
+{
+ return (1UL << (align_order >> cma->order_per_bit)) - 1;
+}
+
+static unsigned long cma_bitmap_maxno(struct cma *cma)
+{
+ return cma->count >> cma->order_per_bit;
+}
+
+static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
+ unsigned long pages)
+{
+ return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
+}
+
+static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count)
+{
+ unsigned long bitmap_no, bitmap_count;
+
+ bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
+ bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+ mutex_lock(&cma->lock);
+ bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
+ mutex_unlock(&cma->lock);
+}
+
+static int __init cma_activate_area(struct cma *cma)
+{
+ int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
+ unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+ unsigned i = cma->count >> pageblock_order;
+ struct zone *zone;
+
+ cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+ if (!cma->bitmap)
+ return -ENOMEM;
+
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ zone = page_zone(pfn_to_page(pfn));
+
+ do {
+ unsigned j;
+
+ base_pfn = pfn;
+ for (j = pageblock_nr_pages; j; --j, pfn++) {
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ /*
+ * alloc_contig_range requires the pfn range
+ * specified to be in the same zone. Make this
+ * simple by forcing the entire CMA resv range
+ * to be in the same zone.
+ */
+ if (page_zone(pfn_to_page(pfn)) != zone)
+ goto err;
+ }
+ init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+ } while (--i);
+
+ mutex_init(&cma->lock);
+ return 0;
+
+err:
+ kfree(cma->bitmap);
+ return -EINVAL;
+}
+
+static int __init cma_init_reserved_areas(void)
+{
+ int i;
+
+ for (i = 0; i < cma_area_count; i++) {
+ int ret = cma_activate_area(&cma_areas[i]);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+core_initcall(cma_init_reserved_areas);
+
+/**
+ * cma_declare_contiguous() - reserve custom contiguous area
+ * @base: Base address of the reserved area optional, use 0 for any
+ * @size: Size of the reserved area (in bytes),
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ * @alignment: Alignment for the CMA area, should be power of 2 or zero
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @fixed: hint about where to place the reserved area
+ * @res_cma: Pointer to store the created cma region.
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base. If false,
+ * reserve in range from @base to @limit.
+ */
+int __init cma_declare_contiguous(phys_addr_t base,
+ phys_addr_t size, phys_addr_t limit,
+ phys_addr_t alignment, unsigned int order_per_bit,
+ bool fixed, struct cma **res_cma)
+{
+ struct cma *cma;
+ int ret = 0;
+
+ pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
+ __func__, (unsigned long)size, (unsigned long)base,
+ (unsigned long)limit, (unsigned long)alignment);
+
+ if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+ pr_err("Not enough slots for CMA reserved regions!\n");
+ return -ENOSPC;
+ }
+
+ if (!size)
+ return -EINVAL;
+
+ if (alignment && !is_power_of_2(alignment))
+ return -EINVAL;
+
+ /*
+ * Sanitise input arguments.
+ * Pages both ends in CMA area could be merged into adjacent unmovable
+ * migratetype page by page allocator's buddy algorithm. In the case,
+ * you couldn't get a contiguous memory, which is not what we want.
+ */
+ alignment = max(alignment,
+ (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+ base = ALIGN(base, alignment);
+ size = ALIGN(size, alignment);
+ limit &= ~(alignment - 1);
+
+ /* size should be aligned with order_per_bit */
+ if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
+ return -EINVAL;
+
+ /* Reserve memory */
+ if (base && fixed) {
+ if (memblock_is_region_reserved(base, size) ||
+ memblock_reserve(base, size) < 0) {
+ ret = -EBUSY;
+ goto err;
+ }
+ } else {
+ phys_addr_t addr = memblock_alloc_range(size, alignment, base,
+ limit);
+ if (!addr) {
+ ret = -ENOMEM;
+ goto err;
+ } else {
+ base = addr;
+ }
+ }
+
+ /*
+ * Each reserved area must be initialised later, when more kernel
+ * subsystems (like slab allocator) are available.
+ */
+ cma = &cma_areas[cma_area_count];
+ cma->base_pfn = PFN_DOWN(base);
+ cma->count = size >> PAGE_SHIFT;
+ cma->order_per_bit = order_per_bit;
+ *res_cma = cma;
+ cma_area_count++;
+
+ pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
+ (unsigned long)base);
+ return 0;
+
+err:
+ pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+ return ret;
+}
+
+/**
+ * cma_alloc() - allocate pages from contiguous area
+ * @cma: Contiguous memory region for which the allocation is performed.
+ * @count: Requested number of pages.
+ * @align: Requested alignment of pages (in PAGE_SIZE order).
+ *
+ * This function allocates part of contiguous memory on specific
+ * contiguous memory area.
+ */
+struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
+{
+ unsigned long mask, pfn, start = 0;
+ unsigned long bitmap_maxno, bitmap_no, bitmap_count;
+ struct page *page = NULL;
+ int ret;
+
+ if (!cma || !cma->count)
+ return NULL;
+
+ pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+ count, align);
+
+ if (!count)
+ return NULL;
+
+ mask = cma_bitmap_aligned_mask(cma, align);
+ bitmap_maxno = cma_bitmap_maxno(cma);
+ bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+ for (;;) {
+ mutex_lock(&cma->lock);
+ bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
+ bitmap_maxno, start, bitmap_count, mask);
+ if (bitmap_no >= bitmap_maxno) {
+ mutex_unlock(&cma->lock);
+ break;
+ }
+ bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
+ /*
+ * It's safe to drop the lock here. We've marked this region for
+ * our exclusive use. If the migration fails we will take the
+ * lock again and unmark it.
+ */
+ mutex_unlock(&cma->lock);
+
+ pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
+ mutex_lock(&cma_mutex);
+ ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+ mutex_unlock(&cma_mutex);
+ if (ret == 0) {
+ page = pfn_to_page(pfn);
+ break;
+ }
+
+ cma_clear_bitmap(cma, pfn, count);
+ if (ret != -EBUSY)
+ break;
+
+ pr_debug("%s(): memory range at %p is busy, retrying\n",
+ __func__, pfn_to_page(pfn));
+ /* try again with a bit different memory target */
+ start = bitmap_no + mask + 1;
+ }
+
+ pr_debug("%s(): returned %p\n", __func__, page);
+ return page;
+}
+
+/**
+ * cma_release() - release allocated pages
+ * @cma: Contiguous memory region for which the allocation is performed.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function releases memory allocated by alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool cma_release(struct cma *cma, struct page *pages, int count)
+{
+ unsigned long pfn;
+
+ if (!cma || !pages)
+ return false;
+
+ pr_debug("%s(page %p)\n", __func__, (void *)pages);
+
+ pfn = page_to_pfn(pages);
+
+ if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+ return false;
+
+ VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
+
+ free_contig_range(pfn, count);
+ cma_clear_bitmap(cma, pfn, count);
+
+ return true;
+}
diff --git a/mm/compaction.c b/mm/compaction.c
index 21bf292b642a..51750197db11 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -325,14 +325,14 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
/* Found a free page, break it into order-0 pages */
isolated = split_free_page(page);
- total_isolated += isolated;
- for (i = 0; i < isolated; i++) {
- list_add(&page->lru, freelist);
- page++;
- }
-
- /* If a page was split, advance to the end of it */
if (isolated) {
+ total_isolated += isolated;
+ for (i = 0; i < isolated; i++) {
+ list_add(&page->lru, freelist);
+ page++;
+ }
+
+ /* If a page was split, advance to the end of it */
blockpfn += isolated - 1;
cursor += isolated - 1;
continue;
@@ -341,9 +341,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
isolate_fail:
if (strict)
break;
- else
- continue;
-
}
trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated);
diff --git a/mm/filemap.c b/mm/filemap.c
index 65d44fd88c78..f501b56ec2c6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/cpuset.h>
#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
+#include <linux/hugetlb.h>
#include <linux/memcontrol.h>
#include <linux/cleancache.h>
#include <linux/rmap.h>
@@ -233,7 +234,6 @@ void delete_from_page_cache(struct page *page)
spin_lock_irq(&mapping->tree_lock);
__delete_from_page_cache(page, NULL);
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
if (freepage)
freepage(page);
@@ -489,8 +489,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
if (PageSwapBacked(new))
__inc_zone_page_state(new, NR_SHMEM);
spin_unlock_irq(&mapping->tree_lock);
- /* mem_cgroup codes must not be called under tree_lock */
- mem_cgroup_replace_page_cache(old, new);
+ mem_cgroup_migrate(old, new, true);
radix_tree_preload_end();
if (freepage)
freepage(old);
@@ -548,19 +547,24 @@ static int __add_to_page_cache_locked(struct page *page,
pgoff_t offset, gfp_t gfp_mask,
void **shadowp)
{
+ int huge = PageHuge(page);
+ struct mem_cgroup *memcg;
int error;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapBacked(page), page);
- error = mem_cgroup_charge_file(page, current->mm,
- gfp_mask & GFP_RECLAIM_MASK);
- if (error)
- return error;
+ if (!huge) {
+ error = mem_cgroup_try_charge(page, current->mm,
+ gfp_mask, &memcg);
+ if (error)
+ return error;
+ }
error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
if (error) {
- mem_cgroup_uncharge_cache_page(page);
+ if (!huge)
+ mem_cgroup_cancel_charge(page, memcg);
return error;
}
@@ -575,13 +579,16 @@ static int __add_to_page_cache_locked(struct page *page,
goto err_insert;
__inc_zone_page_state(page, NR_FILE_PAGES);
spin_unlock_irq(&mapping->tree_lock);
+ if (!huge)
+ mem_cgroup_commit_charge(page, memcg, false);
trace_mm_filemap_add_to_page_cache(page);
return 0;
err_insert:
page->mapping = NULL;
/* Leave page->index set: truncation relies upon it */
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
+ if (!huge)
+ mem_cgroup_cancel_charge(page, memcg);
page_cache_release(page);
return error;
}
@@ -808,6 +815,17 @@ int __lock_page_killable(struct page *page)
}
EXPORT_SYMBOL_GPL(__lock_page_killable);
+/*
+ * Return values:
+ * 1 - page is locked; mmap_sem is still held.
+ * 0 - page is not locked.
+ * mmap_sem has been released (up_read()), unless flags had both
+ * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
+ * which case mmap_sem is still held.
+ *
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
+ * with the page locked and the mmap_sem unperturbed.
+ */
int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
{
@@ -1091,9 +1109,9 @@ no_page:
if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
fgp_flags |= FGP_LOCK;
- /* Init accessed so avoit atomic mark_page_accessed later */
+ /* Init accessed so avoid atomic mark_page_accessed later */
if (fgp_flags & FGP_ACCESSED)
- init_page_accessed(page);
+ __SetPageReferenced(page);
err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
if (unlikely(err)) {
@@ -1827,6 +1845,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
* The goto's are kind of ugly, but this streamlines the normal case of having
* it in the page cache, and handles the special cases reasonably without
* having a lot of duplicated code.
+ *
+ * vma->vm_mm->mmap_sem must be held on entry.
+ *
+ * If our return value has VM_FAULT_RETRY set, it's because
+ * lock_page_or_retry() returned 0.
+ * The mmap_sem has usually been released in this case.
+ * See __lock_page_or_retry() for the exception.
+ *
+ * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
+ * has not been released.
+ *
+ * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
*/
int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
diff --git a/mm/gup.c b/mm/gup.c
index cc5a9e7adea7..91d044b1600d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -258,6 +258,11 @@ unmap:
return ret;
}
+/*
+ * mmap_sem must be held on entry. If @nonblocking != NULL and
+ * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
+ * If it is, *@nonblocking will be set to 0 and -EBUSY returned.
+ */
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
unsigned long address, unsigned int *flags, int *nonblocking)
{
@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* with a put_page() call when it is finished with. vmas will only
* remain valid while mmap_sem is held.
*
- * Must be called with mmap_sem held for read or write.
+ * Must be called with mmap_sem held. It may be released. See below.
*
* __get_user_pages walks a process's page tables and takes a reference to
* each struct page that each user address corresponds to at a given
@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
*
* If @nonblocking != NULL, __get_user_pages will not wait for disk IO
* or mmap_sem contention, and if waiting is needed to pin all pages,
- * *@nonblocking will be set to 0.
+ * *@nonblocking will be set to 0. Further, if @gup_flags does not
+ * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
+ * this case.
+ *
+ * A caller using such a combination of @nonblocking and @gup_flags
+ * must therefore hold the mmap_sem for reading only, and recognize
+ * when it's been released. Otherwise, it must be held for either
+ * reading or writing and will not be released.
*
* In most cases, get_user_pages or get_user_pages_fast should be used
* instead of __get_user_pages. __get_user_pages should be used only if
@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages);
* such architectures, gup() will not be enough to make a subsequent access
* succeed.
*
- * This should be called with the mm_sem held for read.
+ * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault().
*/
int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
unsigned long address, unsigned int fault_flags)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 33514d88fef9..d9a21d06b862 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -715,13 +715,20 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
unsigned long haddr, pmd_t *pmd,
struct page *page)
{
+ struct mem_cgroup *memcg;
pgtable_t pgtable;
spinlock_t *ptl;
VM_BUG_ON_PAGE(!PageCompound(page), page);
+
+ if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg))
+ return VM_FAULT_OOM;
+
pgtable = pte_alloc_one(mm, haddr);
- if (unlikely(!pgtable))
+ if (unlikely(!pgtable)) {
+ mem_cgroup_cancel_charge(page, memcg);
return VM_FAULT_OOM;
+ }
clear_huge_page(page, haddr, HPAGE_PMD_NR);
/*
@@ -734,7 +741,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_none(*pmd))) {
spin_unlock(ptl);
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
put_page(page);
pte_free(mm, pgtable);
} else {
@@ -742,6 +749,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, haddr);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, haddr, pmd, entry);
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
@@ -827,13 +836,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
- if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) {
- put_page(page);
- count_vm_event(THP_FAULT_FALLBACK);
- return VM_FAULT_FALLBACK;
- }
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
- mem_cgroup_uncharge_page(page);
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -979,6 +982,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
struct page *page,
unsigned long haddr)
{
+ struct mem_cgroup *memcg;
spinlock_t *ptl;
pgtable_t pgtable;
pmd_t _pmd;
@@ -999,20 +1003,21 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
__GFP_OTHER_NODE,
vma, address, page_to_nid(page));
if (unlikely(!pages[i] ||
- mem_cgroup_charge_anon(pages[i], mm,
- GFP_KERNEL))) {
+ mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL,
+ &memcg))) {
if (pages[i])
put_page(pages[i]);
- mem_cgroup_uncharge_start();
while (--i >= 0) {
- mem_cgroup_uncharge_page(pages[i]);
+ memcg = (void *)page_private(pages[i]);
+ set_page_private(pages[i], 0);
+ mem_cgroup_cancel_charge(pages[i], memcg);
put_page(pages[i]);
}
- mem_cgroup_uncharge_end();
kfree(pages);
ret |= VM_FAULT_OOM;
goto out;
}
+ set_page_private(pages[i], (unsigned long)memcg);
}
for (i = 0; i < HPAGE_PMD_NR; i++) {
@@ -1041,7 +1046,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
pte_t *pte, entry;
entry = mk_pte(pages[i], vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ memcg = (void *)page_private(pages[i]);
+ set_page_private(pages[i], 0);
page_add_new_anon_rmap(pages[i], vma, haddr);
+ mem_cgroup_commit_charge(pages[i], memcg, false);
+ lru_cache_add_active_or_unevictable(pages[i], vma);
pte = pte_offset_map(&_pmd, haddr);
VM_BUG_ON(!pte_none(*pte));
set_pte_at(mm, haddr, pte, entry);
@@ -1065,12 +1074,12 @@ out:
out_free_pages:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- mem_cgroup_uncharge_start();
for (i = 0; i < HPAGE_PMD_NR; i++) {
- mem_cgroup_uncharge_page(pages[i]);
+ memcg = (void *)page_private(pages[i]);
+ set_page_private(pages[i], 0);
+ mem_cgroup_cancel_charge(pages[i], memcg);
put_page(pages[i]);
}
- mem_cgroup_uncharge_end();
kfree(pages);
goto out;
}
@@ -1081,6 +1090,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
spinlock_t *ptl;
int ret = 0;
struct page *page = NULL, *new_page;
+ struct mem_cgroup *memcg;
unsigned long haddr;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -1132,7 +1142,8 @@ alloc:
goto out;
}
- if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) {
+ if (unlikely(mem_cgroup_try_charge(new_page, mm,
+ GFP_TRANSHUGE, &memcg))) {
put_page(new_page);
if (page) {
split_huge_page(page);
@@ -1161,7 +1172,7 @@ alloc:
put_user_huge_page(page);
if (unlikely(!pmd_same(*pmd, orig_pmd))) {
spin_unlock(ptl);
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
put_page(new_page);
goto out_mn;
} else {
@@ -1170,6 +1181,8 @@ alloc:
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
pmdp_clear_flush(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
set_pmd_at(mm, haddr, pmd, entry);
update_mmu_cache_pmd(vma, address, pmd);
if (!page) {
@@ -1681,7 +1694,7 @@ static void __split_huge_page_refcount(struct page *page,
&page_tail->_count);
/* after clearing PageTail the gup refcount can be released */
- smp_mb();
+ smp_mb__after_atomic();
/*
* retain hwpoison flag of the poisoned tail page:
@@ -1775,6 +1788,8 @@ static int __split_huge_page_map(struct page *page,
if (pmd) {
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
pmd_populate(mm, &_pmd, pgtable);
+ if (pmd_write(*pmd))
+ BUG_ON(page_mapcount(page) != 1);
haddr = address;
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -1784,8 +1799,6 @@ static int __split_huge_page_map(struct page *page,
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (!pmd_write(*pmd))
entry = pte_wrprotect(entry);
- else
- BUG_ON(page_mapcount(page) != 1);
if (!pmd_young(*pmd))
entry = pte_mkold(entry);
if (pmd_numa(*pmd))
@@ -2233,6 +2246,30 @@ static void khugepaged_alloc_sleep(void)
static int khugepaged_node_load[MAX_NUMNODES];
+static bool khugepaged_scan_abort(int nid)
+{
+ int i;
+
+ /*
+ * If zone_reclaim_mode is disabled, then no extra effort is made to
+ * allocate memory locally.
+ */
+ if (!zone_reclaim_mode)
+ return false;
+
+ /* If there is a count for this node already, it must be acceptable */
+ if (khugepaged_node_load[nid])
+ return false;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ if (!khugepaged_node_load[i])
+ continue;
+ if (node_distance(nid, i) > RECLAIM_DISTANCE)
+ return true;
+ }
+ return false;
+}
+
#ifdef CONFIG_NUMA
static int khugepaged_find_target_node(void)
{
@@ -2389,6 +2426,7 @@ static void collapse_huge_page(struct mm_struct *mm,
spinlock_t *pmd_ptl, *pte_ptl;
int isolated;
unsigned long hstart, hend;
+ struct mem_cgroup *memcg;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -2399,7 +2437,8 @@ static void collapse_huge_page(struct mm_struct *mm,
if (!new_page)
return;
- if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)))
+ if (unlikely(mem_cgroup_try_charge(new_page, mm,
+ GFP_TRANSHUGE, &memcg)))
return;
/*
@@ -2486,6 +2525,8 @@ static void collapse_huge_page(struct mm_struct *mm,
spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
page_add_new_anon_rmap(new_page, vma, address);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, address, pmd, _pmd);
update_mmu_cache_pmd(vma, address, pmd);
@@ -2499,7 +2540,7 @@ out_up_write:
return;
out:
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
goto out_up_write;
}
@@ -2545,6 +2586,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
* hit record.
*/
node = page_to_nid(page);
+ if (khugepaged_scan_abort(node))
+ goto out_unmap;
khugepaged_node_load[node]++;
VM_BUG_ON_PAGE(PageCompound(page), page);
if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7a0a73d2fcff..eeceeeb09019 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,7 +35,6 @@
#include <linux/node.h>
#include "internal.h"
-const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
unsigned long hugepages_treat_as_movable;
int hugetlb_max_hstate __read_mostly;
@@ -1089,6 +1088,9 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
unsigned long pfn;
struct hstate *h;
+ if (!hugepages_supported())
+ return;
+
/* Set scan step to minimum hugepage size */
for_each_hstate(h)
if (order > huge_page_order(h))
@@ -1734,21 +1736,13 @@ static ssize_t nr_hugepages_show_common(struct kobject *kobj,
return sprintf(buf, "%lu\n", nr_huge_pages);
}
-static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
- struct kobject *kobj, struct kobj_attribute *attr,
- const char *buf, size_t len)
+static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
+ struct hstate *h, int nid,
+ unsigned long count, size_t len)
{
int err;
- int nid;
- unsigned long count;
- struct hstate *h;
NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
- err = kstrtoul(buf, 10, &count);
- if (err)
- goto out;
-
- h = kobj_to_hstate(kobj, &nid);
if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
err = -EINVAL;
goto out;
@@ -1784,6 +1778,23 @@ out:
return err;
}
+static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
+ struct kobject *kobj, const char *buf,
+ size_t len)
+{
+ struct hstate *h;
+ unsigned long count;
+ int nid;
+ int err;
+
+ err = kstrtoul(buf, 10, &count);
+ if (err)
+ return err;
+
+ h = kobj_to_hstate(kobj, &nid);
+ return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len);
+}
+
static ssize_t nr_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -1793,7 +1804,7 @@ static ssize_t nr_hugepages_show(struct kobject *kobj,
static ssize_t nr_hugepages_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf, size_t len)
{
- return nr_hugepages_store_common(false, kobj, attr, buf, len);
+ return nr_hugepages_store_common(false, kobj, buf, len);
}
HSTATE_ATTR(nr_hugepages);
@@ -1812,7 +1823,7 @@ static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf, size_t len)
{
- return nr_hugepages_store_common(true, kobj, attr, buf, len);
+ return nr_hugepages_store_common(true, kobj, buf, len);
}
HSTATE_ATTR(nr_hugepages_mempolicy);
#endif
@@ -2248,36 +2259,21 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
void __user *buffer, size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
- unsigned long tmp;
+ unsigned long tmp = h->max_huge_pages;
int ret;
if (!hugepages_supported())
return -ENOTSUPP;
- tmp = h->max_huge_pages;
-
- if (write && hstate_is_gigantic(h) && !gigantic_page_supported())
- return -EINVAL;
-
table->data = &tmp;
table->maxlen = sizeof(unsigned long);
ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (ret)
goto out;
- if (write) {
- NODEMASK_ALLOC(nodemask_t, nodes_allowed,
- GFP_KERNEL | __GFP_NORETRY);
- if (!(obey_mempolicy &&
- init_nodemask_of_mempolicy(nodes_allowed))) {
- NODEMASK_FREE(nodes_allowed);
- nodes_allowed = &node_states[N_MEMORY];
- }
- h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
-
- if (nodes_allowed != &node_states[N_MEMORY])
- NODEMASK_FREE(nodes_allowed);
- }
+ if (write)
+ ret = __nr_hugepages_store_common(obey_mempolicy, h,
+ NUMA_NO_NODE, tmp, *length);
out:
return ret;
}
@@ -2754,8 +2750,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
* from other VMAs and let the children be SIGKILLed if they are faulting the
* same region.
*/
-static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
- struct page *page, unsigned long address)
+static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct page *page, unsigned long address)
{
struct hstate *h = hstate_vma(vma);
struct vm_area_struct *iter_vma;
@@ -2794,8 +2790,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
address + huge_page_size(h), page);
}
mutex_unlock(&mapping->i_mmap_mutex);
-
- return 1;
}
/*
@@ -2810,7 +2804,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
{
struct hstate *h = hstate_vma(vma);
struct page *old_page, *new_page;
- int outside_reserve = 0;
+ int ret = 0, outside_reserve = 0;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -2840,14 +2834,14 @@ retry_avoidcopy:
page_cache_get(old_page);
- /* Drop page table lock as buddy allocator may be called */
+ /*
+ * Drop page table lock as buddy allocator may be called. It will
+ * be acquired again before returning to the caller, as expected.
+ */
spin_unlock(ptl);
new_page = alloc_huge_page(vma, address, outside_reserve);
if (IS_ERR(new_page)) {
- long err = PTR_ERR(new_page);
- page_cache_release(old_page);
-
/*
* If a process owning a MAP_PRIVATE mapping fails to COW,
* it is due to references held by a child and an insufficient
@@ -2856,29 +2850,25 @@ retry_avoidcopy:
* may get SIGKILLed if it later faults.
*/
if (outside_reserve) {
+ page_cache_release(old_page);
BUG_ON(huge_pte_none(pte));
- if (unmap_ref_private(mm, vma, old_page, address)) {
- BUG_ON(huge_pte_none(pte));
- spin_lock(ptl);
- ptep = huge_pte_offset(mm, address & huge_page_mask(h));
- if (likely(ptep &&
- pte_same(huge_ptep_get(ptep), pte)))
- goto retry_avoidcopy;
- /*
- * race occurs while re-acquiring page table
- * lock, and our job is done.
- */
- return 0;
- }
- WARN_ON_ONCE(1);
+ unmap_ref_private(mm, vma, old_page, address);
+ BUG_ON(huge_pte_none(pte));
+ spin_lock(ptl);
+ ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+ if (likely(ptep &&
+ pte_same(huge_ptep_get(ptep), pte)))
+ goto retry_avoidcopy;
+ /*
+ * race occurs while re-acquiring page table
+ * lock, and our job is done.
+ */
+ return 0;
}
- /* Caller expects lock to be held */
- spin_lock(ptl);
- if (err == -ENOMEM)
- return VM_FAULT_OOM;
- else
- return VM_FAULT_SIGBUS;
+ ret = (PTR_ERR(new_page) == -ENOMEM) ?
+ VM_FAULT_OOM : VM_FAULT_SIGBUS;
+ goto out_release_old;
}
/*
@@ -2886,11 +2876,8 @@ retry_avoidcopy:
* anon_vma prepared.
*/
if (unlikely(anon_vma_prepare(vma))) {
- page_cache_release(new_page);
- page_cache_release(old_page);
- /* Caller expects lock to be held */
- spin_lock(ptl);
- return VM_FAULT_OOM;
+ ret = VM_FAULT_OOM;
+ goto out_release_all;
}
copy_user_huge_page(new_page, old_page, address, vma,
@@ -2900,6 +2887,7 @@ retry_avoidcopy:
mmun_start = address & huge_page_mask(h);
mmun_end = mmun_start + huge_page_size(h);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
/*
* Retake the page table lock to check for racing updates
* before the page tables are altered
@@ -2920,12 +2908,13 @@ retry_avoidcopy:
}
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+out_release_all:
page_cache_release(new_page);
+out_release_old:
page_cache_release(old_page);
- /* Caller expects lock to be held */
- spin_lock(ptl);
- return 0;
+ spin_lock(ptl); /* Caller expects lock to be held */
+ return ret;
}
/* Return the pagecache page at a given address within a VMA */
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index 95487c71cad5..329caf56df22 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -72,8 +72,7 @@ DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");
static void pfn_inject_exit(void)
{
- if (hwpoison_dir)
- debugfs_remove_recursive(hwpoison_dir);
+ debugfs_remove_recursive(hwpoison_dir);
}
static int pfn_inject_init(void)
diff --git a/mm/internal.h b/mm/internal.h
index 7f22a11fcc66..a1b651b11c5f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -247,7 +247,7 @@ static inline void mlock_migrate_page(struct page *new, struct page *old) { }
static inline struct page *mem_map_offset(struct page *base, int offset)
{
if (unlikely(offset >= MAX_ORDER_NR_PAGES))
- return pfn_to_page(page_to_pfn(base) + offset);
+ return nth_page(base, offset);
return base + offset;
}
diff --git a/mm/madvise.c b/mm/madvise.c
index a402f8fdc68e..0938b30da4ab 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -292,9 +292,6 @@ static long madvise_dontneed(struct vm_area_struct *vma,
/*
* Application wants to free up the pages and associated backing store.
* This is effectively punching a hole into the middle of a file.
- *
- * NOTE: Currently, only shmfs/tmpfs is supported for this operation.
- * Other filesystems return -ENOSYS.
*/
static long madvise_remove(struct vm_area_struct *vma,
struct vm_area_struct **prev,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f009a14918d2..475ecadd9646 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -754,9 +754,11 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
struct mem_cgroup_tree_per_zone *mctz)
{
- spin_lock(&mctz->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mctz->lock, flags);
__mem_cgroup_remove_exceeded(mz, mctz);
- spin_unlock(&mctz->lock);
+ spin_unlock_irqrestore(&mctz->lock, flags);
}
@@ -779,7 +781,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
* mem is over its softlimit.
*/
if (excess || mz->on_tree) {
- spin_lock(&mctz->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mctz->lock, flags);
/* if on-tree, remove it */
if (mz->on_tree)
__mem_cgroup_remove_exceeded(mz, mctz);
@@ -788,7 +792,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
* If excess is 0, no tree ops.
*/
__mem_cgroup_insert_exceeded(mz, mctz, excess);
- spin_unlock(&mctz->lock);
+ spin_unlock_irqrestore(&mctz->lock, flags);
}
}
}
@@ -839,9 +843,9 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
{
struct mem_cgroup_per_zone *mz;
- spin_lock(&mctz->lock);
+ spin_lock_irq(&mctz->lock);
mz = __mem_cgroup_largest_soft_limit_node(mctz);
- spin_unlock(&mctz->lock);
+ spin_unlock_irq(&mctz->lock);
return mz;
}
@@ -882,13 +886,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
return val;
}
-static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
- bool charge)
-{
- int val = (charge) ? 1 : -1;
- this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
-}
-
static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
enum mem_cgroup_events_index idx)
{
@@ -909,13 +906,13 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
struct page *page,
- bool anon, int nr_pages)
+ int nr_pages)
{
/*
* Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
* counted as CACHE even if it's on ANON LRU.
*/
- if (anon)
+ if (PageAnon(page))
__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
nr_pages);
else
@@ -1013,7 +1010,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
*/
static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
{
- preempt_disable();
/* threshold event is triggered in finer grain than soft limit */
if (unlikely(mem_cgroup_event_ratelimit(memcg,
MEM_CGROUP_TARGET_THRESH))) {
@@ -1026,8 +1022,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
do_numainfo = mem_cgroup_event_ratelimit(memcg,
MEM_CGROUP_TARGET_NUMAINFO);
#endif
- preempt_enable();
-
mem_cgroup_threshold(memcg);
if (unlikely(do_softlimit))
mem_cgroup_update_tree(memcg, page);
@@ -1035,8 +1029,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
if (unlikely(do_numainfo))
atomic_inc(&memcg->numainfo_events);
#endif
- } else
- preempt_enable();
+ }
}
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
@@ -1347,20 +1340,6 @@ out:
return lruvec;
}
-/*
- * Following LRU functions are allowed to be used without PCG_LOCK.
- * Operations are called by routine of global LRU independently from memcg.
- * What we have to take care of here is validness of pc->mem_cgroup.
- *
- * Changes to pc->mem_cgroup happens when
- * 1. charge
- * 2. moving account
- * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
- * It is added to LRU before charge.
- * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
- * When moving account, the page is not on LRU. It's isolated.
- */
-
/**
* mem_cgroup_page_lruvec - return lruvec for adding an lru page
* @page: the page
@@ -2261,22 +2240,14 @@ cleanup:
*
* Notes: Race condition
*
- * We usually use lock_page_cgroup() for accessing page_cgroup member but
- * it tends to be costly. But considering some conditions, we doesn't need
- * to do so _always_.
+ * Charging occurs during page instantiation, while the page is
+ * unmapped and locked in page migration, or while the page table is
+ * locked in THP migration. No race is possible.
*
- * Considering "charge", lock_page_cgroup() is not required because all
- * file-stat operations happen after a page is attached to radix-tree. There
- * are no race with "charge".
+ * Uncharge happens to pages with zero references, no race possible.
*
- * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup
- * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even
- * if there are race with "uncharge". Statistics itself is properly handled
- * by flags.
- *
- * Considering "move", this is an only case we see a race. To make the race
- * small, we check memcg->moving_account and detect there are possibility
- * of race or not. If there is, we take a lock.
+ * Charge moving between groups is protected by checking mm->moving
+ * account and taking the move_lock in the slowpath.
*/
void __mem_cgroup_begin_update_page_stat(struct page *page,
@@ -2551,55 +2522,63 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
return NOTIFY_OK;
}
-
-/* See mem_cgroup_try_charge() for details */
-enum {
- CHARGE_OK, /* success */
- CHARGE_RETRY, /* need to retry but retry is not bad */
- CHARGE_NOMEM, /* we can't do more. return -ENOMEM */
- CHARGE_WOULDBLOCK, /* GFP_WAIT wasn't set and no enough res. */
-};
-
-static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
- unsigned int nr_pages, unsigned int min_pages,
- bool invoke_oom)
+static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ unsigned int nr_pages)
{
- unsigned long csize = nr_pages * PAGE_SIZE;
+ unsigned int batch = max(CHARGE_BATCH, nr_pages);
+ int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup *mem_over_limit;
struct res_counter *fail_res;
+ unsigned long nr_reclaimed;
unsigned long flags = 0;
- int ret;
+ unsigned long long size;
+ int ret = 0;
- ret = res_counter_charge(&memcg->res, csize, &fail_res);
+retry:
+ if (consume_stock(memcg, nr_pages))
+ goto done;
- if (likely(!ret)) {
+ size = batch * PAGE_SIZE;
+ if (!res_counter_charge(&memcg->res, size, &fail_res)) {
if (!do_swap_account)
- return CHARGE_OK;
- ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
- if (likely(!ret))
- return CHARGE_OK;
-
- res_counter_uncharge(&memcg->res, csize);
+ goto done_restock;
+ if (!res_counter_charge(&memcg->memsw, size, &fail_res))
+ goto done_restock;
+ res_counter_uncharge(&memcg->res, size);
mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
flags |= MEM_CGROUP_RECLAIM_NOSWAP;
} else
mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
+
+ if (batch > nr_pages) {
+ batch = nr_pages;
+ goto retry;
+ }
+
/*
- * Never reclaim on behalf of optional batching, retry with a
- * single page instead.
+ * Unlike in global OOM situations, memcg is not in a physical
+ * memory shortage. Allow dying and OOM-killed tasks to
+ * bypass the last charges so that they can exit quickly and
+ * free their memory.
*/
- if (nr_pages > min_pages)
- return CHARGE_RETRY;
+ if (unlikely(test_thread_flag(TIF_MEMDIE) ||
+ fatal_signal_pending(current) ||
+ current->flags & PF_EXITING))
+ goto bypass;
+
+ if (unlikely(task_in_memcg_oom(current)))
+ goto nomem;
if (!(gfp_mask & __GFP_WAIT))
- return CHARGE_WOULDBLOCK;
+ goto nomem;
- if (gfp_mask & __GFP_NORETRY)
- return CHARGE_NOMEM;
+ nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
- ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
- if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
- return CHARGE_RETRY;
+ if (mem_cgroup_margin(mem_over_limit) >= batch)
+ goto retry;
+
+ if (gfp_mask & __GFP_NORETRY)
+ goto nomem;
/*
* Even though the limit is exceeded at this point, reclaim
* may have been able to free some pages. Retry the charge
@@ -2609,142 +2588,47 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
* unlikely to succeed so close to the limit, and we fall back
* to regular pages anyway in case of failure.
*/
- if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret)
- return CHARGE_RETRY;
-
+ if (nr_reclaimed && batch <= (1 << PAGE_ALLOC_COSTLY_ORDER))
+ goto retry;
/*
* At task move, charge accounts can be doubly counted. So, it's
* better to wait until the end of task_move if something is going on.
*/
if (mem_cgroup_wait_acct_move(mem_over_limit))
- return CHARGE_RETRY;
-
- if (invoke_oom)
- mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize));
-
- return CHARGE_NOMEM;
-}
-
-/**
- * mem_cgroup_try_charge - try charging a memcg
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns 0 if @memcg was charged successfully, -EINTR if the charge
- * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
- */
-static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
- gfp_t gfp_mask,
- unsigned int nr_pages,
- bool oom)
-{
- unsigned int batch = max(CHARGE_BATCH, nr_pages);
- int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
- int ret;
-
- if (mem_cgroup_is_root(memcg))
- goto done;
- /*
- * Unlike in global OOM situations, memcg is not in a physical
- * memory shortage. Allow dying and OOM-killed tasks to
- * bypass the last charges so that they can exit quickly and
- * free their memory.
- */
- if (unlikely(test_thread_flag(TIF_MEMDIE) ||
- fatal_signal_pending(current) ||
- current->flags & PF_EXITING))
- goto bypass;
+ goto retry;
- if (unlikely(task_in_memcg_oom(current)))
- goto nomem;
+ if (nr_retries--)
+ goto retry;
if (gfp_mask & __GFP_NOFAIL)
- oom = false;
-again:
- if (consume_stock(memcg, nr_pages))
- goto done;
-
- do {
- bool invoke_oom = oom && !nr_oom_retries;
-
- /* If killed, bypass charge */
- if (fatal_signal_pending(current))
- goto bypass;
+ goto bypass;
- ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
- nr_pages, invoke_oom);
- switch (ret) {
- case CHARGE_OK:
- break;
- case CHARGE_RETRY: /* not in OOM situation but retry */
- batch = nr_pages;
- goto again;
- case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
- goto nomem;
- case CHARGE_NOMEM: /* OOM routine works */
- if (!oom || invoke_oom)
- goto nomem;
- nr_oom_retries--;
- break;
- }
- } while (ret != CHARGE_OK);
+ if (fatal_signal_pending(current))
+ goto bypass;
- if (batch > nr_pages)
- refill_stock(memcg, batch - nr_pages);
-done:
- return 0;
+ mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(batch));
nomem:
if (!(gfp_mask & __GFP_NOFAIL))
return -ENOMEM;
bypass:
- return -EINTR;
-}
-
-/**
- * mem_cgroup_try_charge_mm - try charging a mm
- * @mm: mm_struct to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns the charged mem_cgroup associated with the given mm_struct or
- * NULL the charge failed.
- */
-static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
- gfp_t gfp_mask,
- unsigned int nr_pages,
- bool oom)
-
-{
- struct mem_cgroup *memcg;
- int ret;
-
- memcg = get_mem_cgroup_from_mm(mm);
- ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
- css_put(&memcg->css);
- if (ret == -EINTR)
- memcg = root_mem_cgroup;
- else if (ret)
- memcg = NULL;
+ memcg = root_mem_cgroup;
+ ret = -EINTR;
+ goto retry;
- return memcg;
+done_restock:
+ if (batch > nr_pages)
+ refill_stock(memcg, batch - nr_pages);
+done:
+ return ret;
}
-/*
- * Somemtimes we have to undo a charge we got by try_charge().
- * This function is for that and do uncharge, put css's refcnt.
- * gotten by try_charge().
- */
-static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
- unsigned int nr_pages)
+static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
{
- if (!mem_cgroup_is_root(memcg)) {
- unsigned long bytes = nr_pages * PAGE_SIZE;
+ unsigned long bytes = nr_pages * PAGE_SIZE;
- res_counter_uncharge(&memcg->res, bytes);
- if (do_swap_account)
- res_counter_uncharge(&memcg->memsw, bytes);
- }
+ res_counter_uncharge(&memcg->res, bytes);
+ if (do_swap_account)
+ res_counter_uncharge(&memcg->memsw, bytes);
}
/*
@@ -2756,9 +2640,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
{
unsigned long bytes = nr_pages * PAGE_SIZE;
- if (mem_cgroup_is_root(memcg))
- return;
-
res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
if (do_swap_account)
res_counter_uncharge_until(&memcg->memsw,
@@ -2779,6 +2660,16 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
return mem_cgroup_from_id(id);
}
+/*
+ * try_get_mem_cgroup_from_page - look up page's memcg association
+ * @page: the page
+ *
+ * Look up, get a css reference, and return the memcg that owns @page.
+ *
+ * The page must be locked to prevent racing with swap-in and page
+ * cache charges. If coming from an unlocked page table, the caller
+ * must ensure the page is on the LRU or this can race with charging.
+ */
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
{
struct mem_cgroup *memcg = NULL;
@@ -2789,7 +2680,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
VM_BUG_ON_PAGE(!PageLocked(page), page);
pc = lookup_page_cgroup(page);
- lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
memcg = pc->mem_cgroup;
if (memcg && !css_tryget_online(&memcg->css))
@@ -2803,23 +2693,46 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
memcg = NULL;
rcu_read_unlock();
}
- unlock_page_cgroup(pc);
return memcg;
}
-static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
- struct page *page,
- unsigned int nr_pages,
- enum charge_type ctype,
- bool lrucare)
+static void lock_page_lru(struct page *page, int *isolated)
+{
+ struct zone *zone = page_zone(page);
+
+ spin_lock_irq(&zone->lru_lock);
+ if (PageLRU(page)) {
+ struct lruvec *lruvec;
+
+ lruvec = mem_cgroup_page_lruvec(page, zone);
+ ClearPageLRU(page);
+ del_page_from_lru_list(page, lruvec, page_lru(page));
+ *isolated = 1;
+ } else
+ *isolated = 0;
+}
+
+static void unlock_page_lru(struct page *page, int isolated)
+{
+ struct zone *zone = page_zone(page);
+
+ if (isolated) {
+ struct lruvec *lruvec;
+
+ lruvec = mem_cgroup_page_lruvec(page, zone);
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ SetPageLRU(page);
+ add_page_to_lru_list(page, lruvec, page_lru(page));
+ }
+ spin_unlock_irq(&zone->lru_lock);
+}
+
+static void commit_charge(struct page *page, struct mem_cgroup *memcg,
+ unsigned int nr_pages, bool lrucare)
{
struct page_cgroup *pc = lookup_page_cgroup(page);
- struct zone *uninitialized_var(zone);
- struct lruvec *lruvec;
- bool was_on_lru = false;
- bool anon;
+ int isolated;
- lock_page_cgroup(pc);
VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
/*
* we don't need page_cgroup_lock about tail pages, becase they are not
@@ -2830,52 +2743,38 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
* In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
* may already be on some other mem_cgroup's LRU. Take care of it.
*/
- if (lrucare) {
- zone = page_zone(page);
- spin_lock_irq(&zone->lru_lock);
- if (PageLRU(page)) {
- lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
- ClearPageLRU(page);
- del_page_from_lru_list(page, lruvec, page_lru(page));
- was_on_lru = true;
- }
- }
+ if (lrucare)
+ lock_page_lru(page, &isolated);
- pc->mem_cgroup = memcg;
/*
- * We access a page_cgroup asynchronously without lock_page_cgroup().
- * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
- * is accessed after testing USED bit. To make pc->mem_cgroup visible
- * before USED bit, we need memory barrier here.
- * See mem_cgroup_add_lru_list(), etc.
+ * Nobody should be changing or seriously looking at
+ * pc->mem_cgroup and pc->flags at this point:
+ *
+ * - the page is uncharged
+ *
+ * - the page is off-LRU
+ *
+ * - an anonymous fault has exclusive page access, except for
+ * a locked page table
+ *
+ * - a page cache insertion, a swapin fault, or a migration
+ * have the page locked
*/
- smp_wmb();
- SetPageCgroupUsed(pc);
-
- if (lrucare) {
- if (was_on_lru) {
- lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
- VM_BUG_ON_PAGE(PageLRU(page), page);
- SetPageLRU(page);
- add_page_to_lru_list(page, lruvec, page_lru(page));
- }
- spin_unlock_irq(&zone->lru_lock);
- }
-
- if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
- anon = true;
- else
- anon = false;
+ pc->mem_cgroup = memcg;
+ pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
- mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
- unlock_page_cgroup(pc);
+ if (lrucare)
+ unlock_page_lru(page, isolated);
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, page, nr_pages);
/*
* "charge_statistics" updated event counter. Then, check it.
* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
* if they exceeds softlimit.
*/
memcg_check_events(memcg, page);
+ local_irq_enable();
}
static DEFINE_MUTEX(set_limit_mutex);
@@ -2937,22 +2836,21 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
if (ret)
return ret;
- ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
- oom_gfp_allowed(gfp));
+ ret = try_charge(memcg, gfp, size >> PAGE_SHIFT);
if (ret == -EINTR) {
/*
- * mem_cgroup_try_charge() chosed to bypass to root due to
- * OOM kill or fatal signal. Since our only options are to
- * either fail the allocation or charge it to this cgroup, do
- * it as a temporary condition. But we can't fail. From a
- * kmem/slab perspective, the cache has already been selected,
- * by mem_cgroup_kmem_get_cache(), so it is too late to change
+ * try_charge() chose to bypass to root due to OOM kill or
+ * fatal signal. Since our only options are to either fail
+ * the allocation or charge it to this cgroup, do it as a
+ * temporary condition. But we can't fail. From a kmem/slab
+ * perspective, the cache has already been selected, by
+ * mem_cgroup_kmem_get_cache(), so it is too late to change
* our minds.
*
* This condition will only trigger if the task entered
- * memcg_charge_kmem in a sane state, but was OOM-killed during
- * mem_cgroup_try_charge() above. Tasks that were already
- * dying when the allocation triggers should have been already
+ * memcg_charge_kmem in a sane state, but was OOM-killed
+ * during try_charge() above. Tasks that were already dying
+ * when the allocation triggers should have been already
* directed to the root cgroup in memcontrol.h
*/
res_counter_charge_nofail(&memcg->res, size, &fail_res);
@@ -3463,12 +3361,13 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
return;
}
-
+ /*
+ * The page is freshly allocated and not visible to any
+ * outside callers yet. Set up pc non-atomically.
+ */
pc = lookup_page_cgroup(page);
- lock_page_cgroup(pc);
pc->mem_cgroup = memcg;
- SetPageCgroupUsed(pc);
- unlock_page_cgroup(pc);
+ pc->flags = PCG_USED;
}
void __memcg_kmem_uncharge_pages(struct page *page, int order)
@@ -3478,19 +3377,11 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
pc = lookup_page_cgroup(page);
- /*
- * Fast unlocked return. Theoretically might have changed, have to
- * check again after locking.
- */
if (!PageCgroupUsed(pc))
return;
- lock_page_cgroup(pc);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- ClearPageCgroupUsed(pc);
- }
- unlock_page_cgroup(pc);
+ memcg = pc->mem_cgroup;
+ pc->flags = 0;
/*
* We trust that only if there is a memcg associated with the page, it
@@ -3510,7 +3401,6 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
/*
* Because tail pages are not marked as "used", set it. We're under
* zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -3531,8 +3421,7 @@ void mem_cgroup_split_huge_fixup(struct page *head)
for (i = 1; i < HPAGE_PMD_NR; i++) {
pc = head_pc + i;
pc->mem_cgroup = memcg;
- smp_wmb();/* see __commit_charge() */
- pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+ pc->flags = head_pc->flags;
}
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
HPAGE_PMD_NR);
@@ -3562,7 +3451,6 @@ static int mem_cgroup_move_account(struct page *page,
{
unsigned long flags;
int ret;
- bool anon = PageAnon(page);
VM_BUG_ON(from == to);
VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -3576,15 +3464,21 @@ static int mem_cgroup_move_account(struct page *page,
if (nr_pages > 1 && !PageTransHuge(page))
goto out;
- lock_page_cgroup(pc);
+ /*
+ * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup
+ * of its source page while we change it: page migration takes
+ * both pages off the LRU, but page cache replacement doesn't.
+ */
+ if (!trylock_page(page))
+ goto out;
ret = -EINVAL;
if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
- goto unlock;
+ goto out_unlock;
move_lock_mem_cgroup(from, &flags);
- if (!anon && page_mapped(page)) {
+ if (!PageAnon(page) && page_mapped(page)) {
__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
nr_pages);
__this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
@@ -3598,20 +3492,25 @@ static int mem_cgroup_move_account(struct page *page,
nr_pages);
}
- mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
+ /*
+ * It is safe to change pc->mem_cgroup here because the page
+ * is referenced, charged, and isolated - we can't race with
+ * uncharging, charging, migration, or LRU putback.
+ */
/* caller should have done css_get */
pc->mem_cgroup = to;
- mem_cgroup_charge_statistics(to, page, anon, nr_pages);
move_unlock_mem_cgroup(from, &flags);
ret = 0;
-unlock:
- unlock_page_cgroup(pc);
- /*
- * check events
- */
+
+ local_irq_disable();
+ mem_cgroup_charge_statistics(to, page, nr_pages);
memcg_check_events(to, page);
+ mem_cgroup_charge_statistics(from, page, -nr_pages);
memcg_check_events(from, page);
+ local_irq_enable();
+out_unlock:
+ unlock_page(page);
out:
return ret;
}
@@ -3682,456 +3581,12 @@ out:
return ret;
}
-int mem_cgroup_charge_anon(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask)
-{
- unsigned int nr_pages = 1;
- struct mem_cgroup *memcg;
- bool oom = true;
-
- if (mem_cgroup_disabled())
- return 0;
-
- VM_BUG_ON_PAGE(page_mapped(page), page);
- VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
- VM_BUG_ON(!mm);
-
- if (PageTransHuge(page)) {
- nr_pages <<= compound_order(page);
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- /*
- * Never OOM-kill a process for a huge page. The
- * fault handler will fall back to regular pages.
- */
- oom = false;
- }
-
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
- if (!memcg)
- return -ENOMEM;
- __mem_cgroup_commit_charge(memcg, page, nr_pages,
- MEM_CGROUP_CHARGE_TYPE_ANON, false);
- return 0;
-}
-
-/*
- * While swap-in, try_charge -> commit or cancel, the page is locked.
- * And when try_charge() successfully returns, one refcnt to memcg without
- * struct page_cgroup is acquired. This refcnt will be consumed by
- * "commit()" or removed by "cancel()"
- */
-static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
- struct page *page,
- gfp_t mask,
- struct mem_cgroup **memcgp)
-{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
- int ret;
-
- pc = lookup_page_cgroup(page);
- /*
- * Every swap fault against a single page tries to charge the
- * page, bail as early as possible. shmem_unuse() encounters
- * already charged pages, too. The USED bit is protected by
- * the page lock, which serializes swap cache removal, which
- * in turn serializes uncharging.
- */
- if (PageCgroupUsed(pc))
- goto out;
- if (do_swap_account)
- memcg = try_get_mem_cgroup_from_page(page);
- if (!memcg)
- memcg = get_mem_cgroup_from_mm(mm);
- ret = mem_cgroup_try_charge(memcg, mask, 1, true);
- css_put(&memcg->css);
- if (ret == -EINTR)
- memcg = root_mem_cgroup;
- else if (ret)
- return ret;
-out:
- *memcgp = memcg;
- return 0;
-}
-
-int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
- gfp_t gfp_mask, struct mem_cgroup **memcgp)
-{
- if (mem_cgroup_disabled()) {
- *memcgp = NULL;
- return 0;
- }
- /*
- * A racing thread's fault, or swapoff, may have already
- * updated the pte, and even removed page from swap cache: in
- * those cases unuse_pte()'s pte_same() test will fail; but
- * there's also a KSM case which does need to charge the page.
- */
- if (!PageSwapCache(page)) {
- struct mem_cgroup *memcg;
-
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
- if (!memcg)
- return -ENOMEM;
- *memcgp = memcg;
- return 0;
- }
- return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
-}
-
-void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
-{
- if (mem_cgroup_disabled())
- return;
- if (!memcg)
- return;
- __mem_cgroup_cancel_charge(memcg, 1);
-}
-
-static void
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
- enum charge_type ctype)
-{
- if (mem_cgroup_disabled())
- return;
- if (!memcg)
- return;
-
- __mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
- /*
- * Now swap is on-memory. This means this page may be
- * counted both as mem and swap....double count.
- * Fix it by uncharging from memsw. Basically, this SwapCache is stable
- * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
- * may call delete_from_swap_cache() before reach here.
- */
- if (do_swap_account && PageSwapCache(page)) {
- swp_entry_t ent = {.val = page_private(page)};
- mem_cgroup_uncharge_swap(ent);
- }
-}
-
-void mem_cgroup_commit_charge_swapin(struct page *page,
- struct mem_cgroup *memcg)
-{
- __mem_cgroup_commit_charge_swapin(page, memcg,
- MEM_CGROUP_CHARGE_TYPE_ANON);
-}
-
-int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
-{
- enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
- struct mem_cgroup *memcg;
- int ret;
-
- if (mem_cgroup_disabled())
- return 0;
- if (PageCompound(page))
- return 0;
-
- if (PageSwapCache(page)) { /* shmem */
- ret = __mem_cgroup_try_charge_swapin(mm, page,
- gfp_mask, &memcg);
- if (ret)
- return ret;
- __mem_cgroup_commit_charge_swapin(page, memcg, type);
- return 0;
- }
-
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
- if (!memcg)
- return -ENOMEM;
- __mem_cgroup_commit_charge(memcg, page, 1, type, false);
- return 0;
-}
-
-static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
- unsigned int nr_pages,
- const enum charge_type ctype)
-{
- struct memcg_batch_info *batch = NULL;
- bool uncharge_memsw = true;
-
- /* If swapout, usage of swap doesn't decrease */
- if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
- uncharge_memsw = false;
-
- batch = &current->memcg_batch;
- /*
- * In usual, we do css_get() when we remember memcg pointer.
- * But in this case, we keep res->usage until end of a series of
- * uncharges. Then, it's ok to ignore memcg's refcnt.
- */
- if (!batch->memcg)
- batch->memcg = memcg;
- /*
- * do_batch > 0 when unmapping pages or inode invalidate/truncate.
- * In those cases, all pages freed continuously can be expected to be in
- * the same cgroup and we have chance to coalesce uncharges.
- * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
- * because we want to do uncharge as soon as possible.
- */
-
- if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
- goto direct_uncharge;
-
- if (nr_pages > 1)
- goto direct_uncharge;
-
- /*
- * In typical case, batch->memcg == mem. This means we can
- * merge a series of uncharges to an uncharge of res_counter.
- * If not, we uncharge res_counter ony by one.
- */
- if (batch->memcg != memcg)
- goto direct_uncharge;
- /* remember freed charge and uncharge it later */
- batch->nr_pages++;
- if (uncharge_memsw)
- batch->memsw_nr_pages++;
- return;
-direct_uncharge:
- res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
- if (uncharge_memsw)
- res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
- if (unlikely(batch->memcg != memcg))
- memcg_oom_recover(memcg);
-}
-
-/*
- * uncharge if !page_mapped(page)
- */
-static struct mem_cgroup *
-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
- bool end_migration)
-{
- struct mem_cgroup *memcg = NULL;
- unsigned int nr_pages = 1;
- struct page_cgroup *pc;
- bool anon;
-
- if (mem_cgroup_disabled())
- return NULL;
-
- if (PageTransHuge(page)) {
- nr_pages <<= compound_order(page);
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- }
- /*
- * Check if our page_cgroup is valid
- */
- pc = lookup_page_cgroup(page);
- if (unlikely(!PageCgroupUsed(pc)))
- return NULL;
-
- lock_page_cgroup(pc);
-
- memcg = pc->mem_cgroup;
-
- if (!PageCgroupUsed(pc))
- goto unlock_out;
-
- anon = PageAnon(page);
-
- switch (ctype) {
- case MEM_CGROUP_CHARGE_TYPE_ANON:
- /*
- * Generally PageAnon tells if it's the anon statistics to be
- * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
- * used before page reached the stage of being marked PageAnon.
- */
- anon = true;
- /* fallthrough */
- case MEM_CGROUP_CHARGE_TYPE_DROP:
- /* See mem_cgroup_prepare_migration() */
- if (page_mapped(page))
- goto unlock_out;
- /*
- * Pages under migration may not be uncharged. But
- * end_migration() /must/ be the one uncharging the
- * unused post-migration page and so it has to call
- * here with the migration bit still set. See the
- * res_counter handling below.
- */
- if (!end_migration && PageCgroupMigration(pc))
- goto unlock_out;
- break;
- case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
- if (!PageAnon(page)) { /* Shared memory */
- if (page->mapping && !page_is_file_cache(page))
- goto unlock_out;
- } else if (page_mapped(page)) /* Anon */
- goto unlock_out;
- break;
- default:
- break;
- }
-
- mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages);
-
- ClearPageCgroupUsed(pc);
- /*
- * pc->mem_cgroup is not cleared here. It will be accessed when it's
- * freed from LRU. This is safe because uncharged page is expected not
- * to be reused (freed soon). Exception is SwapCache, it's handled by
- * special functions.
- */
-
- unlock_page_cgroup(pc);
- /*
- * even after unlock, we have memcg->res.usage here and this memcg
- * will never be freed, so it's safe to call css_get().
- */
- memcg_check_events(memcg, page);
- if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
- mem_cgroup_swap_statistics(memcg, true);
- css_get(&memcg->css);
- }
- /*
- * Migration does not charge the res_counter for the
- * replacement page, so leave it alone when phasing out the
- * page that is unused after the migration.
- */
- if (!end_migration && !mem_cgroup_is_root(memcg))
- mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
-
- return memcg;
-
-unlock_out:
- unlock_page_cgroup(pc);
- return NULL;
-}
-
-void mem_cgroup_uncharge_page(struct page *page)
-{
- /* early check. */
- if (page_mapped(page))
- return;
- VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
- /*
- * If the page is in swap cache, uncharge should be deferred
- * to the swap path, which also properly accounts swap usage
- * and handles memcg lifetime.
- *
- * Note that this check is not stable and reclaim may add the
- * page to swap cache at any time after this. However, if the
- * page is not in swap cache by the time page->mapcount hits
- * 0, there won't be any page table references to the swap
- * slot, and reclaim will free it and not actually write the
- * page to disk.
- */
- if (PageSwapCache(page))
- return;
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
-}
-
-void mem_cgroup_uncharge_cache_page(struct page *page)
-{
- VM_BUG_ON_PAGE(page_mapped(page), page);
- VM_BUG_ON_PAGE(page->mapping, page);
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
-}
-
-/*
- * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
- * In that cases, pages are freed continuously and we can expect pages
- * are in the same memcg. All these calls itself limits the number of
- * pages freed at once, then uncharge_start/end() is called properly.
- * This may be called prural(2) times in a context,
- */
-
-void mem_cgroup_uncharge_start(void)
-{
- current->memcg_batch.do_batch++;
- /* We can do nest. */
- if (current->memcg_batch.do_batch == 1) {
- current->memcg_batch.memcg = NULL;
- current->memcg_batch.nr_pages = 0;
- current->memcg_batch.memsw_nr_pages = 0;
- }
-}
-
-void mem_cgroup_uncharge_end(void)
-{
- struct memcg_batch_info *batch = &current->memcg_batch;
-
- if (!batch->do_batch)
- return;
-
- batch->do_batch--;
- if (batch->do_batch) /* If stacked, do nothing. */
- return;
-
- if (!batch->memcg)
- return;
- /*
- * This "batch->memcg" is valid without any css_get/put etc...
- * bacause we hide charges behind us.
- */
- if (batch->nr_pages)
- res_counter_uncharge(&batch->memcg->res,
- batch->nr_pages * PAGE_SIZE);
- if (batch->memsw_nr_pages)
- res_counter_uncharge(&batch->memcg->memsw,
- batch->memsw_nr_pages * PAGE_SIZE);
- memcg_oom_recover(batch->memcg);
- /* forget this pointer (for sanity check) */
- batch->memcg = NULL;
-}
-
-#ifdef CONFIG_SWAP
-/*
- * called after __delete_from_swap_cache() and drop "page" account.
- * memcg information is recorded to swap_cgroup of "ent"
- */
-void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
-{
- struct mem_cgroup *memcg;
- int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
-
- if (!swapout) /* this was a swap cache but the swap is unused ! */
- ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
-
- memcg = __mem_cgroup_uncharge_common(page, ctype, false);
-
- /*
- * record memcg information, if swapout && memcg != NULL,
- * css_get() was called in uncharge().
- */
- if (do_swap_account && swapout && memcg)
- swap_cgroup_record(ent, mem_cgroup_id(memcg));
-}
-#endif
-
#ifdef CONFIG_MEMCG_SWAP
-/*
- * called from swap_entry_free(). remove record in swap_cgroup and
- * uncharge "memsw" account.
- */
-void mem_cgroup_uncharge_swap(swp_entry_t ent)
+static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
+ bool charge)
{
- struct mem_cgroup *memcg;
- unsigned short id;
-
- if (!do_swap_account)
- return;
-
- id = swap_cgroup_record(ent, 0);
- rcu_read_lock();
- memcg = mem_cgroup_lookup(id);
- if (memcg) {
- /*
- * We uncharge this because swap is freed. This memcg can
- * be obsolete one. We avoid calling css_tryget_online().
- */
- if (!mem_cgroup_is_root(memcg))
- res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
- mem_cgroup_swap_statistics(memcg, false);
- css_put(&memcg->css);
- }
- rcu_read_unlock();
+ int val = (charge) ? 1 : -1;
+ this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
}
/**
@@ -4183,175 +3638,6 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
}
#endif
-/*
- * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
- * page belongs to.
- */
-void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
- struct mem_cgroup **memcgp)
-{
- struct mem_cgroup *memcg = NULL;
- unsigned int nr_pages = 1;
- struct page_cgroup *pc;
- enum charge_type ctype;
-
- *memcgp = NULL;
-
- if (mem_cgroup_disabled())
- return;
-
- if (PageTransHuge(page))
- nr_pages <<= compound_order(page);
-
- pc = lookup_page_cgroup(page);
- lock_page_cgroup(pc);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- css_get(&memcg->css);
- /*
- * At migrating an anonymous page, its mapcount goes down
- * to 0 and uncharge() will be called. But, even if it's fully
- * unmapped, migration may fail and this page has to be
- * charged again. We set MIGRATION flag here and delay uncharge
- * until end_migration() is called
- *
- * Corner Case Thinking
- * A)
- * When the old page was mapped as Anon and it's unmap-and-freed
- * while migration was ongoing.
- * If unmap finds the old page, uncharge() of it will be delayed
- * until end_migration(). If unmap finds a new page, it's
- * uncharged when it make mapcount to be 1->0. If unmap code
- * finds swap_migration_entry, the new page will not be mapped
- * and end_migration() will find it(mapcount==0).
- *
- * B)
- * When the old page was mapped but migraion fails, the kernel
- * remaps it. A charge for it is kept by MIGRATION flag even
- * if mapcount goes down to 0. We can do remap successfully
- * without charging it again.
- *
- * C)
- * The "old" page is under lock_page() until the end of
- * migration, so, the old page itself will not be swapped-out.
- * If the new page is swapped out before end_migraton, our
- * hook to usual swap-out path will catch the event.
- */
- if (PageAnon(page))
- SetPageCgroupMigration(pc);
- }
- unlock_page_cgroup(pc);
- /*
- * If the page is not charged at this point,
- * we return here.
- */
- if (!memcg)
- return;
-
- *memcgp = memcg;
- /*
- * We charge new page before it's used/mapped. So, even if unlock_page()
- * is called before end_migration, we can catch all events on this new
- * page. In the case new page is migrated but not remapped, new page's
- * mapcount will be finally 0 and we call uncharge in end_migration().
- */
- if (PageAnon(page))
- ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
- else
- ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
- /*
- * The page is committed to the memcg, but it's not actually
- * charged to the res_counter since we plan on replacing the
- * old one and only one page is going to be left afterwards.
- */
- __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
-}
-
-/* remove redundant charge if migration failed*/
-void mem_cgroup_end_migration(struct mem_cgroup *memcg,
- struct page *oldpage, struct page *newpage, bool migration_ok)
-{
- struct page *used, *unused;
- struct page_cgroup *pc;
- bool anon;
-
- if (!memcg)
- return;
-
- if (!migration_ok) {
- used = oldpage;
- unused = newpage;
- } else {
- used = newpage;
- unused = oldpage;
- }
- anon = PageAnon(used);
- __mem_cgroup_uncharge_common(unused,
- anon ? MEM_CGROUP_CHARGE_TYPE_ANON
- : MEM_CGROUP_CHARGE_TYPE_CACHE,
- true);
- css_put(&memcg->css);
- /*
- * We disallowed uncharge of pages under migration because mapcount
- * of the page goes down to zero, temporarly.
- * Clear the flag and check the page should be charged.
- */
- pc = lookup_page_cgroup(oldpage);
- lock_page_cgroup(pc);
- ClearPageCgroupMigration(pc);
- unlock_page_cgroup(pc);
-
- /*
- * If a page is a file cache, radix-tree replacement is very atomic
- * and we can skip this check. When it was an Anon page, its mapcount
- * goes down to 0. But because we added MIGRATION flage, it's not
- * uncharged yet. There are several case but page->mapcount check
- * and USED bit check in mem_cgroup_uncharge_page() will do enough
- * check. (see prepare_charge() also)
- */
- if (anon)
- mem_cgroup_uncharge_page(used);
-}
-
-/*
- * At replace page cache, newpage is not under any memcg but it's on
- * LRU. So, this function doesn't touch res_counter but handles LRU
- * in correct way. Both pages are locked so we cannot race with uncharge.
- */
-void mem_cgroup_replace_page_cache(struct page *oldpage,
- struct page *newpage)
-{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
- enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
-
- if (mem_cgroup_disabled())
- return;
-
- pc = lookup_page_cgroup(oldpage);
- /* fix accounting on old pages */
- lock_page_cgroup(pc);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- mem_cgroup_charge_statistics(memcg, oldpage, false, -1);
- ClearPageCgroupUsed(pc);
- }
- unlock_page_cgroup(pc);
-
- /*
- * When called from shmem_replace_page(), in some cases the
- * oldpage has already been charged, and in some cases not.
- */
- if (!memcg)
- return;
- /*
- * Even if newpage->mapping was NULL before starting replacement,
- * the newpage may be on LRU(or pagevec for LRU) already. We lock
- * LRU while we overwrite pc->mem_cgroup.
- */
- __mem_cgroup_commit_charge(memcg, newpage, 1, type, true);
-}
-
#ifdef CONFIG_DEBUG_VM
static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
{
@@ -4550,7 +3836,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_mask, &nr_scanned);
nr_reclaimed += reclaimed;
*total_scanned += nr_scanned;
- spin_lock(&mctz->lock);
+ spin_lock_irq(&mctz->lock);
/*
* If we failed to reclaim anything from this memory cgroup
@@ -4590,7 +3876,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
*/
/* If excess == 0, no tree ops */
__mem_cgroup_insert_exceeded(mz, mctz, excess);
- spin_unlock(&mctz->lock);
+ spin_unlock_irq(&mctz->lock);
css_put(&mz->memcg->css);
loop++;
/*
@@ -4817,78 +4103,24 @@ out:
return retval;
}
-
-static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
- enum mem_cgroup_stat_index idx)
-{
- struct mem_cgroup *iter;
- long val = 0;
-
- /* Per-cpu values can be negative, use a signed accumulator */
- for_each_mem_cgroup_tree(iter, memcg)
- val += mem_cgroup_read_stat(iter, idx);
-
- if (val < 0) /* race ? */
- val = 0;
- return val;
-}
-
-static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
-{
- u64 val;
-
- if (!mem_cgroup_is_root(memcg)) {
- if (!swap)
- return res_counter_read_u64(&memcg->res, RES_USAGE);
- else
- return res_counter_read_u64(&memcg->memsw, RES_USAGE);
- }
-
- /*
- * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
- * as well as in MEM_CGROUP_STAT_RSS_HUGE.
- */
- val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
-
- if (swap)
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
-
- return val << PAGE_SHIFT;
-}
-
static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
- struct cftype *cft)
+ struct cftype *cft)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
- u64 val;
- int name;
- enum res_type type;
-
- type = MEMFILE_TYPE(cft->private);
- name = MEMFILE_ATTR(cft->private);
+ enum res_type type = MEMFILE_TYPE(cft->private);
+ int name = MEMFILE_ATTR(cft->private);
switch (type) {
case _MEM:
- if (name == RES_USAGE)
- val = mem_cgroup_usage(memcg, false);
- else
- val = res_counter_read_u64(&memcg->res, name);
- break;
+ return res_counter_read_u64(&memcg->res, name);
case _MEMSWAP:
- if (name == RES_USAGE)
- val = mem_cgroup_usage(memcg, true);
- else
- val = res_counter_read_u64(&memcg->memsw, name);
- break;
+ return res_counter_read_u64(&memcg->memsw, name);
case _KMEM:
- val = res_counter_read_u64(&memcg->kmem, name);
+ return res_counter_read_u64(&memcg->kmem, name);
break;
default:
BUG();
}
-
- return val;
}
#ifdef CONFIG_MEMCG_KMEM
@@ -5350,7 +4582,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
if (!t)
goto unlock;
- usage = mem_cgroup_usage(memcg, swap);
+ if (!swap)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/*
* current_threshold points to threshold just below or equal to usage.
@@ -5446,15 +4681,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
mutex_lock(&memcg->thresholds_lock);
- if (type == _MEM)
+ if (type == _MEM) {
thresholds = &memcg->thresholds;
- else if (type == _MEMSWAP)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ } else if (type == _MEMSWAP) {
thresholds = &memcg->memsw_thresholds;
- else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ } else
BUG();
- usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
/* Check if a threshold crossed before adding a new one */
if (thresholds->primary)
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -5534,18 +4769,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
int i, j, size;
mutex_lock(&memcg->thresholds_lock);
- if (type == _MEM)
+
+ if (type == _MEM) {
thresholds = &memcg->thresholds;
- else if (type == _MEMSWAP)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ } else if (type == _MEMSWAP) {
thresholds = &memcg->memsw_thresholds;
- else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ } else
BUG();
if (!thresholds->primary)
goto unlock;
- usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
/* Check if a threshold crossed before removing */
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -6299,9 +5535,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
* core guarantees its existence.
*/
} else {
- res_counter_init(&memcg->res, NULL);
- res_counter_init(&memcg->memsw, NULL);
- res_counter_init(&memcg->kmem, NULL);
+ res_counter_init(&memcg->res, &root_mem_cgroup->res);
+ res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw);
+ res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
/*
* Deeper hierachy with use_hierarchy == false doesn't make
* much sense so let cgroup subsystem know about this
@@ -6435,55 +5671,38 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
#ifdef CONFIG_MMU
/* Handlers for move charge at task migration. */
-#define PRECHARGE_COUNT_AT_ONCE 256
static int mem_cgroup_do_precharge(unsigned long count)
{
- int ret = 0;
- int batch_count = PRECHARGE_COUNT_AT_ONCE;
- struct mem_cgroup *memcg = mc.to;
+ int ret;
- if (mem_cgroup_is_root(memcg)) {
+ /* Try a single bulk charge without reclaim first */
+ ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+ if (!ret) {
mc.precharge += count;
- /* we don't need css_get for root */
return ret;
}
- /* try to charge at once */
- if (count > 1) {
- struct res_counter *dummy;
- /*
- * "memcg" cannot be under rmdir() because we've already checked
- * by cgroup_lock_live_cgroup() that it is not removed and we
- * are still under the same cgroup_mutex. So we can postpone
- * css_get().
- */
- if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
- goto one_by_one;
- if (do_swap_account && res_counter_charge(&memcg->memsw,
- PAGE_SIZE * count, &dummy)) {
- res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
- goto one_by_one;
- }
- mc.precharge += count;
+ if (ret == -EINTR) {
+ cancel_charge(root_mem_cgroup, count);
return ret;
}
-one_by_one:
- /* fall back to one by one charge */
+
+ /* Try charges one by one with reclaim */
while (count--) {
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
- if (!batch_count--) {
- batch_count = PRECHARGE_COUNT_AT_ONCE;
- cond_resched();
- }
- ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
+ ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
+ /*
+ * In case of failure, any residual charges against
+ * mc.to will be dropped by mem_cgroup_clear_mc()
+ * later on. However, cancel any charges that are
+ * bypassed to root right away or they'll be lost.
+ */
+ if (ret == -EINTR)
+ cancel_charge(root_mem_cgroup, 1);
if (ret)
- /* mem_cgroup_clear_mc() will do uncharge later */
return ret;
mc.precharge++;
+ cond_resched();
}
- return ret;
+ return 0;
}
/**
@@ -6619,9 +5838,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
if (page) {
pc = lookup_page_cgroup(page);
/*
- * Do only loose check w/o page_cgroup lock.
- * mem_cgroup_move_account() checks the pc is valid or not under
- * the lock.
+ * Do only loose check w/o serialization.
+ * mem_cgroup_move_account() checks the pc is valid or
+ * not under LRU exclusion.
*/
if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
@@ -6746,7 +5965,7 @@ static void __mem_cgroup_clear_mc(void)
/* we must uncharge all the leftover precharges from mc.to */
if (mc.precharge) {
- __mem_cgroup_cancel_charge(mc.to, mc.precharge);
+ cancel_charge(mc.to, mc.precharge);
mc.precharge = 0;
}
/*
@@ -6754,27 +5973,24 @@ static void __mem_cgroup_clear_mc(void)
* we must uncharge here.
*/
if (mc.moved_charge) {
- __mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
+ cancel_charge(mc.from, mc.moved_charge);
mc.moved_charge = 0;
}
/* we must fixup refcnts and charges */
if (mc.moved_swap) {
/* uncharge swap account from the old cgroup */
- if (!mem_cgroup_is_root(mc.from))
- res_counter_uncharge(&mc.from->memsw,
- PAGE_SIZE * mc.moved_swap);
+ res_counter_uncharge(&mc.from->memsw,
+ PAGE_SIZE * mc.moved_swap);
for (i = 0; i < mc.moved_swap; i++)
css_put(&mc.from->css);
- if (!mem_cgroup_is_root(mc.to)) {
- /*
- * we charged both to->res and to->memsw, so we should
- * uncharge to->res.
- */
- res_counter_uncharge(&mc.to->res,
- PAGE_SIZE * mc.moved_swap);
- }
+ /*
+ * we charged both to->res and to->memsw, so we should
+ * uncharge to->res.
+ */
+ res_counter_uncharge(&mc.to->res,
+ PAGE_SIZE * mc.moved_swap);
/* we've already done css_get(mc.to) */
mc.moved_swap = 0;
}
@@ -7086,6 +6302,403 @@ static void __init enable_swap_cgroup(void)
}
#endif
+#ifdef CONFIG_MEMCG_SWAP
+/**
+ * mem_cgroup_swapout - transfer a memsw charge to swap
+ * @page: page whose memsw charge to transfer
+ * @entry: swap entry to move the charge to
+ *
+ * Transfer the memsw charge of @page to @entry.
+ */
+void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+ struct page_cgroup *pc;
+ unsigned short oldid;
+
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ VM_BUG_ON_PAGE(page_count(page), page);
+
+ if (!do_swap_account)
+ return;
+
+ pc = lookup_page_cgroup(page);
+
+ /* Readahead page, never charged */
+ if (!PageCgroupUsed(pc))
+ return;
+
+ VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ VM_BUG_ON_PAGE(oldid, page);
+
+ pc->flags &= ~PCG_MEMSW;
+ css_get(&pc->mem_cgroup->css);
+ mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+}
+
+/**
+ * mem_cgroup_uncharge_swap - uncharge a swap entry
+ * @entry: swap entry to uncharge
+ *
+ * Drop the memsw charge associated with @entry.
+ */
+void mem_cgroup_uncharge_swap(swp_entry_t entry)
+{
+ struct mem_cgroup *memcg;
+ unsigned short id;
+
+ if (!do_swap_account)
+ return;
+
+ id = swap_cgroup_record(entry, 0);
+ rcu_read_lock();
+ memcg = mem_cgroup_lookup(id);
+ if (memcg) {
+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+ mem_cgroup_swap_statistics(memcg, false);
+ css_put(&memcg->css);
+ }
+ rcu_read_unlock();
+}
+#endif
+
+/**
+ * mem_cgroup_try_charge - try charging a page
+ * @page: page to charge
+ * @mm: mm context of the victim
+ * @gfp_mask: reclaim mode
+ * @memcgp: charged memcg return
+ *
+ * Try to charge @page to the memcg that @mm belongs to, reclaiming
+ * pages according to @gfp_mask if necessary.
+ *
+ * Returns 0 on success, with *@memcgp pointing to the charged memcg.
+ * Otherwise, an error code is returned.
+ *
+ * After page->mapping has been set up, the caller must finalize the
+ * charge with mem_cgroup_commit_charge(). Or abort the transaction
+ * with mem_cgroup_cancel_charge() in case page instantiation fails.
+ */
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask, struct mem_cgroup **memcgp)
+{
+ struct mem_cgroup *memcg = NULL;
+ unsigned int nr_pages = 1;
+ int ret = 0;
+
+ if (mem_cgroup_disabled())
+ goto out;
+
+ if (PageSwapCache(page)) {
+ struct page_cgroup *pc = lookup_page_cgroup(page);
+ /*
+ * Every swap fault against a single page tries to charge the
+ * page, bail as early as possible. shmem_unuse() encounters
+ * already charged pages, too. The USED bit is protected by
+ * the page lock, which serializes swap cache removal, which
+ * in turn serializes uncharging.
+ */
+ if (PageCgroupUsed(pc))
+ goto out;
+ }
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ }
+
+ if (do_swap_account && PageSwapCache(page))
+ memcg = try_get_mem_cgroup_from_page(page);
+ if (!memcg)
+ memcg = get_mem_cgroup_from_mm(mm);
+
+ ret = try_charge(memcg, gfp_mask, nr_pages);
+
+ css_put(&memcg->css);
+
+ if (ret == -EINTR) {
+ memcg = root_mem_cgroup;
+ ret = 0;
+ }
+out:
+ *memcgp = memcg;
+ return ret;
+}
+
+/**
+ * mem_cgroup_commit_charge - commit a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ * @lrucare: page might be on LRU already
+ *
+ * Finalize a charge transaction started by mem_cgroup_try_charge(),
+ * after page->mapping has been set up. This must happen atomically
+ * as part of the page instantiation, i.e. under the page table lock
+ * for anonymous pages, under the page lock for page and swap cache.
+ *
+ * In addition, the page must not be on the LRU during the commit, to
+ * prevent racing with task migration. If it might be, use @lrucare.
+ *
+ * Use mem_cgroup_cancel_charge() to cancel the transaction instead.
+ */
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+ bool lrucare)
+{
+ unsigned int nr_pages = 1;
+
+ VM_BUG_ON_PAGE(!page->mapping, page);
+ VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page);
+
+ if (mem_cgroup_disabled())
+ return;
+ /*
+ * Swap faults will attempt to charge the same page multiple
+ * times. But reuse_swap_page() might have removed the page
+ * from swapcache already, so we can't check PageSwapCache().
+ */
+ if (!memcg)
+ return;
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ }
+
+ commit_charge(page, memcg, nr_pages, lrucare);
+
+ if (do_swap_account && PageSwapCache(page)) {
+ swp_entry_t entry = { .val = page_private(page) };
+ /*
+ * The swap entry might not get freed for a long time,
+ * let's not wait for it. The page already received a
+ * memory+swap charge, drop the swap entry duplicate.
+ */
+ mem_cgroup_uncharge_swap(entry);
+ }
+}
+
+/**
+ * mem_cgroup_cancel_charge - cancel a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ *
+ * Cancel a charge transaction started by mem_cgroup_try_charge().
+ */
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
+{
+ unsigned int nr_pages = 1;
+
+ if (mem_cgroup_disabled())
+ return;
+ /*
+ * Swap faults will attempt to charge the same page multiple
+ * times. But reuse_swap_page() might have removed the page
+ * from swapcache already, so we can't check PageSwapCache().
+ */
+ if (!memcg)
+ return;
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ }
+
+ cancel_charge(memcg, nr_pages);
+}
+
+static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
+ unsigned long nr_mem, unsigned long nr_memsw,
+ unsigned long nr_anon, unsigned long nr_file,
+ unsigned long nr_huge, struct page *dummy_page)
+{
+ unsigned long flags;
+
+ if (nr_mem)
+ res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE);
+ if (nr_memsw)
+ res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE);
+
+ memcg_oom_recover(memcg);
+
+ local_irq_save(flags);
+ __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
+ __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
+ __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
+ __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
+ __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+ memcg_check_events(memcg, dummy_page);
+ local_irq_restore(flags);
+}
+
+static void uncharge_list(struct list_head *page_list)
+{
+ struct mem_cgroup *memcg = NULL;
+ unsigned long nr_memsw = 0;
+ unsigned long nr_anon = 0;
+ unsigned long nr_file = 0;
+ unsigned long nr_huge = 0;
+ unsigned long pgpgout = 0;
+ unsigned long nr_mem = 0;
+ struct list_head *next;
+ struct page *page;
+
+ next = page_list->next;
+ do {
+ unsigned int nr_pages = 1;
+ struct page_cgroup *pc;
+
+ page = list_entry(next, struct page, lru);
+ next = page->lru.next;
+
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ VM_BUG_ON_PAGE(page_count(page), page);
+
+ pc = lookup_page_cgroup(page);
+ if (!PageCgroupUsed(pc))
+ continue;
+
+ /*
+ * Nobody should be changing or seriously looking at
+ * pc->mem_cgroup and pc->flags at this point, we have
+ * fully exclusive access to the page.
+ */
+
+ if (memcg != pc->mem_cgroup) {
+ if (memcg) {
+ uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+ nr_anon, nr_file, nr_huge, page);
+ pgpgout = nr_mem = nr_memsw = 0;
+ nr_anon = nr_file = nr_huge = 0;
+ }
+ memcg = pc->mem_cgroup;
+ }
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ nr_huge += nr_pages;
+ }
+
+ if (PageAnon(page))
+ nr_anon += nr_pages;
+ else
+ nr_file += nr_pages;
+
+ if (pc->flags & PCG_MEM)
+ nr_mem += nr_pages;
+ if (pc->flags & PCG_MEMSW)
+ nr_memsw += nr_pages;
+ pc->flags = 0;
+
+ pgpgout++;
+ } while (next != page_list);
+
+ if (memcg)
+ uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+ nr_anon, nr_file, nr_huge, page);
+}
+
+/**
+ * mem_cgroup_uncharge - uncharge a page
+ * @page: page to uncharge
+ *
+ * Uncharge a page previously charged with mem_cgroup_try_charge() and
+ * mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge(struct page *page)
+{
+ struct page_cgroup *pc;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ /* Don't touch page->lru of any random page, pre-check: */
+ pc = lookup_page_cgroup(page);
+ if (!PageCgroupUsed(pc))
+ return;
+
+ INIT_LIST_HEAD(&page->lru);
+ uncharge_list(&page->lru);
+}
+
+/**
+ * mem_cgroup_uncharge_list - uncharge a list of page
+ * @page_list: list of pages to uncharge
+ *
+ * Uncharge a list of pages previously charged with
+ * mem_cgroup_try_charge() and mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge_list(struct list_head *page_list)
+{
+ if (mem_cgroup_disabled())
+ return;
+
+ if (!list_empty(page_list))
+ uncharge_list(page_list);
+}
+
+/**
+ * mem_cgroup_migrate - migrate a charge to another page
+ * @oldpage: currently charged page
+ * @newpage: page to transfer the charge to
+ * @lrucare: both pages might be on the LRU already
+ *
+ * Migrate the charge from @oldpage to @newpage.
+ *
+ * Both pages must be locked, @newpage->mapping must be set up.
+ */
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+ bool lrucare)
+{
+ unsigned int nr_pages = 1;
+ struct page_cgroup *pc;
+ int isolated;
+
+ VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
+ VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+ VM_BUG_ON_PAGE(!lrucare && PageLRU(oldpage), oldpage);
+ VM_BUG_ON_PAGE(!lrucare && PageLRU(newpage), newpage);
+ VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
+
+ if (mem_cgroup_disabled())
+ return;
+
+ /* Page cache replacement: new page already charged? */
+ pc = lookup_page_cgroup(newpage);
+ if (PageCgroupUsed(pc))
+ return;
+
+ /* Re-entrant migration: old page already uncharged? */
+ pc = lookup_page_cgroup(oldpage);
+ if (!PageCgroupUsed(pc))
+ return;
+
+ VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
+ VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
+
+ if (PageTransHuge(oldpage)) {
+ nr_pages <<= compound_order(oldpage);
+ VM_BUG_ON_PAGE(!PageTransHuge(oldpage), oldpage);
+ VM_BUG_ON_PAGE(!PageTransHuge(newpage), newpage);
+ }
+
+ if (lrucare)
+ lock_page_lru(oldpage, &isolated);
+
+ pc->flags = 0;
+
+ if (lrucare)
+ unlock_page_lru(oldpage, isolated);
+
+ local_irq_disable();
+ mem_cgroup_charge_statistics(pc->mem_cgroup, oldpage, -nr_pages);
+ memcg_check_events(pc->mem_cgroup, oldpage);
+ local_irq_enable();
+
+ commit_charge(newpage, pc->mem_cgroup, nr_pages, lrucare);
+}
+
/*
* subsys_initcall() for memory controller.
*
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a013bc94ebbe..44c6bd201d3a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1173,6 +1173,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
lock_page(hpage);
/*
+ * The page could have changed compound pages during the locking.
+ * If this happens just bail out.
+ */
+ if (compound_head(p) != hpage) {
+ action_result(pfn, "different compound page after locking", IGNORED);
+ res = -EBUSY;
+ goto out;
+ }
+
+ /*
* We use page flags to determine what action should be taken, but
* the flags can be modified by the error containment action. One
* example is an mlocked page, where PG_mlocked is cleared by
diff --git a/mm/memory.c b/mm/memory.c
index 8b44f765b645..84de6dacd0cc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -884,7 +884,7 @@ out_set_pte:
return 0;
}
-int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
@@ -1292,7 +1292,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
details = NULL;
BUG_ON(addr >= end);
- mem_cgroup_uncharge_start();
tlb_start_vma(tlb, vma);
pgd = pgd_offset(vma->vm_mm, addr);
do {
@@ -1302,7 +1301,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
next = zap_pud_range(tlb, vma, pgd, addr, next, details);
} while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
- mem_cgroup_uncharge_end();
}
@@ -2049,6 +2047,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *dirty_page = NULL;
unsigned long mmun_start = 0; /* For mmu_notifiers */
unsigned long mmun_end = 0; /* For mmu_notifiers */
+ struct mem_cgroup *memcg;
old_page = vm_normal_page(vma, address, orig_pte);
if (!old_page) {
@@ -2204,7 +2203,7 @@ gotten:
}
__SetPageUptodate(new_page);
- if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
+ if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
goto oom_free_new;
mmun_start = address & PAGE_MASK;
@@ -2234,6 +2233,8 @@ gotten:
*/
ptep_clear_flush(vma, address, page_table);
page_add_new_anon_rmap(new_page, vma, address);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
/*
* We call the notify macro here because, when using secondary
* mmu page tables (such as kvm shadow page tables), we want the
@@ -2271,7 +2272,7 @@ gotten:
new_page = old_page;
ret |= VM_FAULT_WRITE;
} else
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
if (new_page)
page_cache_release(new_page);
@@ -2399,7 +2400,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * We return with the mmap_sem locked or unlocked in the same cases
+ * as does filemap_fault().
*/
static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -2407,10 +2411,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
{
spinlock_t *ptl;
struct page *page, *swapcache;
+ struct mem_cgroup *memcg;
swp_entry_t entry;
pte_t pte;
int locked;
- struct mem_cgroup *ptr;
int exclusive = 0;
int ret = 0;
@@ -2486,7 +2490,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_page;
}
- if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
+ if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) {
ret = VM_FAULT_OOM;
goto out_page;
}
@@ -2511,10 +2515,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
* while the page is counted on swap but not yet in mapcount i.e.
* before page_add_anon_rmap() and swap_free(); try_to_free_swap()
* must be called after the swap_free(), or it will never succeed.
- * Because delete_from_swap_page() may be called by reuse_swap_page(),
- * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
- * in page->private. In this case, a record in swap_cgroup is silently
- * discarded at swap_free().
*/
inc_mm_counter_fast(mm, MM_ANONPAGES);
@@ -2530,12 +2530,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (pte_swp_soft_dirty(orig_pte))
pte = pte_mksoft_dirty(pte);
set_pte_at(mm, address, page_table, pte);
- if (page == swapcache)
+ if (page == swapcache) {
do_page_add_anon_rmap(page, vma, address, exclusive);
- else /* ksm created a completely new copy */
+ mem_cgroup_commit_charge(page, memcg, true);
+ } else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, address);
- /* It's better to call commit-charge after rmap is established */
- mem_cgroup_commit_charge_swapin(page, ptr);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
+ }
swap_free(entry);
if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2568,7 +2570,7 @@ unlock:
out:
return ret;
out_nomap:
- mem_cgroup_cancel_charge_swapin(ptr);
+ mem_cgroup_cancel_charge(page, memcg);
pte_unmap_unlock(page_table, ptl);
out_page:
unlock_page(page);
@@ -2624,6 +2626,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags)
{
+ struct mem_cgroup *memcg;
struct page *page;
spinlock_t *ptl;
pte_t entry;
@@ -2657,7 +2660,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
*/
__SetPageUptodate(page);
- if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL))
+ if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
goto oom_free_page;
entry = mk_pte(page, vma->vm_page_prot);
@@ -2670,6 +2673,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
setpte:
set_pte_at(mm, address, page_table, entry);
@@ -2679,7 +2684,7 @@ unlock:
pte_unmap_unlock(page_table, ptl);
return 0;
release:
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
page_cache_release(page);
goto unlock;
oom_free_page:
@@ -2688,6 +2693,11 @@ oom:
return VM_FAULT_OOM;
}
+/*
+ * The mmap_sem must have been held on entry, and may have been
+ * released depending on flags and vma->vm_ops->fault() return value.
+ * See filemap_fault() and __lock_page_retry().
+ */
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
pgoff_t pgoff, unsigned int flags, struct page **page)
{
@@ -2744,7 +2754,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
- pte_mksoft_dirty(entry);
+ entry = pte_mksoft_dirty(entry);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
@@ -2917,6 +2927,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{
struct page *fault_page, *new_page;
+ struct mem_cgroup *memcg;
spinlock_t *ptl;
pte_t *pte;
int ret;
@@ -2928,7 +2939,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (!new_page)
return VM_FAULT_OOM;
- if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) {
+ if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) {
page_cache_release(new_page);
return VM_FAULT_OOM;
}
@@ -2948,12 +2959,14 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
goto uncharge_out;
}
do_set_pte(vma, address, new_page, pte, true, true);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
unlock_page(fault_page);
page_cache_release(fault_page);
return ret;
uncharge_out:
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
page_cache_release(new_page);
return ret;
}
@@ -2969,6 +2982,8 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int dirtied = 0;
int ret, tmp;
+ WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
+
ret = __do_fault(vma, address, pgoff, flags, &fault_page);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
@@ -2999,6 +3014,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (set_page_dirty(fault_page))
dirtied = 1;
+ /*
+ * Take a local copy of the address_space - page.mapping may be zeroed
+ * by truncate after unlock_page(). The address_space itself remains
+ * pinned by vma->vm_file's reference. We rely on unlock_page()'s
+ * release semantics to prevent the compiler from undoing this copying.
+ */
mapping = fault_page->mapping;
unlock_page(fault_page);
if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) {
@@ -3016,6 +3037,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return ret;
}
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults).
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
+ */
static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
@@ -3040,7 +3067,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -3172,7 +3201,10 @@ out:
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
@@ -3181,7 +3213,7 @@ static int handle_pte_fault(struct mm_struct *mm,
pte_t entry;
spinlock_t *ptl;
- entry = *pte;
+ entry = ACCESS_ONCE(*pte);
if (!pte_present(entry)) {
if (pte_none(entry)) {
if (vma->vm_ops) {
@@ -3232,6 +3264,9 @@ unlock:
/*
* By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
@@ -3313,6 +3348,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return handle_pte_fault(mm, vma, address, pte, pmd, flags);
}
+/*
+ * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
+ */
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 469bbf505f85..2ff8c2325e96 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -284,8 +284,8 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
}
#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
-static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
- unsigned long end_pfn)
+static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn,
+ unsigned long end_pfn)
{
unsigned long old_zone_end_pfn;
@@ -427,8 +427,8 @@ out_fail:
return -1;
}
-static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
- unsigned long end_pfn)
+static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
+ unsigned long end_pfn)
{
unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
@@ -977,15 +977,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
zone = page_zone(pfn_to_page(pfn));
ret = -EINVAL;
- if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
+ if ((zone_idx(zone) > ZONE_NORMAL ||
+ online_type == MMOP_ONLINE_MOVABLE) &&
!can_online_high_movable(zone))
goto out;
- if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
+ if (online_type == MMOP_ONLINE_KERNEL &&
+ zone_idx(zone) == ZONE_MOVABLE) {
if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
goto out;
}
- if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
+ if (online_type == MMOP_ONLINE_MOVABLE &&
+ zone_idx(zone) == ZONE_MOVABLE - 1) {
if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
goto out;
}
@@ -1156,6 +1159,34 @@ static int check_hotplug_memory_range(u64 start, u64 size)
return 0;
}
+/*
+ * If movable zone has already been setup, newly added memory should be check.
+ * If its address is higher than movable zone, it should be added as movable.
+ * Without this check, movable zone may overlap with other zone.
+ */
+static int should_add_memory_movable(int nid, u64 start, u64 size)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ pg_data_t *pgdat = NODE_DATA(nid);
+ struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
+
+ if (zone_is_empty(movable_zone))
+ return 0;
+
+ if (movable_zone->zone_start_pfn <= start_pfn)
+ return 1;
+
+ return 0;
+}
+
+int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
+{
+ if (should_add_memory_movable(nid, start, size))
+ return ZONE_MOVABLE;
+
+ return zone_default;
+}
+
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
int __ref add_memory(int nid, u64 start, u64 size)
{
diff --git a/mm/migrate.c b/mm/migrate.c
index be6dbf995c0c..f78ec9bd454d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -780,6 +780,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
if (rc != MIGRATEPAGE_SUCCESS) {
newpage->mapping = NULL;
} else {
+ mem_cgroup_migrate(page, newpage, false);
if (remap_swapcache)
remove_migration_ptes(page, newpage);
page->mapping = NULL;
@@ -795,7 +796,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
{
int rc = -EAGAIN;
int remap_swapcache = 1;
- struct mem_cgroup *mem;
struct anon_vma *anon_vma = NULL;
if (!trylock_page(page)) {
@@ -821,9 +821,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
lock_page(page);
}
- /* charge against new page */
- mem_cgroup_prepare_migration(page, newpage, &mem);
-
if (PageWriteback(page)) {
/*
* Only in the case of a full synchronous migration is it
@@ -833,10 +830,10 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
*/
if (mode != MIGRATE_SYNC) {
rc = -EBUSY;
- goto uncharge;
+ goto out_unlock;
}
if (!force)
- goto uncharge;
+ goto out_unlock;
wait_on_page_writeback(page);
}
/*
@@ -872,7 +869,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
*/
remap_swapcache = 0;
} else {
- goto uncharge;
+ goto out_unlock;
}
}
@@ -885,7 +882,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
* the page migration right away (proteced by page lock).
*/
rc = balloon_page_migrate(newpage, page, mode);
- goto uncharge;
+ goto out_unlock;
}
/*
@@ -904,7 +901,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
VM_BUG_ON_PAGE(PageAnon(page), page);
if (page_has_private(page)) {
try_to_free_buffers(page);
- goto uncharge;
+ goto out_unlock;
}
goto skip_unmap;
}
@@ -923,10 +920,7 @@ skip_unmap:
if (anon_vma)
put_anon_vma(anon_vma);
-uncharge:
- mem_cgroup_end_migration(mem, page, newpage,
- (rc == MIGRATEPAGE_SUCCESS ||
- rc == MIGRATEPAGE_BALLOON_SUCCESS));
+out_unlock:
unlock_page(page);
out:
return rc;
@@ -1786,7 +1780,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
pg_data_t *pgdat = NODE_DATA(node);
int isolated = 0;
struct page *new_page = NULL;
- struct mem_cgroup *memcg = NULL;
int page_lru = page_is_file_cache(page);
unsigned long mmun_start = address & HPAGE_PMD_MASK;
unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
@@ -1852,15 +1845,6 @@ fail_putback:
goto out_unlock;
}
- /*
- * Traditional migration needs to prepare the memcg charge
- * transaction early to prevent the old page from being
- * uncharged when installing migration entries. Here we can
- * save the potential rollback and start the charge transfer
- * only when migration is already known to end successfully.
- */
- mem_cgroup_prepare_migration(page, new_page, &memcg);
-
orig_entry = *pmd;
entry = mk_pmd(new_page, vma->vm_page_prot);
entry = pmd_mkhuge(entry);
@@ -1888,14 +1872,10 @@ fail_putback:
goto fail_putback;
}
+ mem_cgroup_migrate(page, new_page, false);
+
page_remove_rmap(page);
- /*
- * Finish the charge transaction under the page table lock to
- * prevent split_huge_page() from dividing up the charge
- * before it's fully transferred to the new page.
- */
- mem_cgroup_end_migration(memcg, page, new_page, true);
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
diff --git a/mm/mlock.c b/mm/mlock.c
index b1eb53634005..ce84cb0b83ef 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -210,12 +210,19 @@ out:
* @vma: target vma
* @start: start address
* @end: end address
+ * @nonblocking:
*
* This takes care of making the pages present too.
*
* return 0 on success, negative error code on error.
*
- * vma->vm_mm->mmap_sem must be held for at least read.
+ * vma->vm_mm->mmap_sem must be held.
+ *
+ * If @nonblocking is NULL, it may be held for read or write and will
+ * be unperturbed.
+ *
+ * If @nonblocking is non-NULL, it must held for read only and may be
+ * released. If it's released, *@nonblocking will be set to 0.
*/
long __mlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *nonblocking)
diff --git a/mm/mmap.c b/mm/mmap.c
index 129b847d30cc..64c9d736155c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -31,6 +31,7 @@
#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <linux/mmu_notifier.h>
+#include <linux/mmdebug.h>
#include <linux/perf_event.h>
#include <linux/audit.h>
#include <linux/khugepaged.h>
@@ -134,6 +135,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
{
unsigned long free, allowed, reserve;
+ VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
+ -(s64)vm_committed_as_batch * num_online_cpus(),
+ "memory commitment underflow");
+
vm_acct_memory(pages);
/*
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3291e82d4352..d3e8c0d1dbf3 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -208,8 +208,6 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
/* Default to all available memory */
*totalpages = totalram_pages + total_swap_pages;
- if (!zonelist)
- return CONSTRAINT_NONE;
/*
* Reach here only when __GFP_NOFAIL is used. So, we should avoid
* to kill current.We have to random task kill in this case.
@@ -258,8 +256,6 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
unsigned long totalpages, const nodemask_t *nodemask,
bool force_kill)
{
- if (task->exit_state)
- return OOM_SCAN_CONTINUE;
if (oom_unkillable_task(task, NULL, nodemask))
return OOM_SCAN_CONTINUE;
@@ -559,28 +555,25 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
* if a parallel OOM killing is already taking place that includes a zone in
* the zonelist. Otherwise, locks all zones in the zonelist and returns 1.
*/
-int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask)
{
struct zoneref *z;
struct zone *zone;
- int ret = 1;
+ bool ret = true;
spin_lock(&zone_scan_lock);
- for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+ for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
if (zone_is_oom_locked(zone)) {
- ret = 0;
+ ret = false;
goto out;
}
- }
- for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
- /*
- * Lock each zone in the zonelist under zone_scan_lock so a
- * parallel invocation of try_set_zonelist_oom() doesn't succeed
- * when it shouldn't.
- */
+ /*
+ * Lock each zone in the zonelist under zone_scan_lock so a parallel
+ * call to oom_zonelist_trylock() doesn't succeed when it shouldn't.
+ */
+ for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
zone_set_flag(zone, ZONE_OOM_LOCKED);
- }
out:
spin_unlock(&zone_scan_lock);
@@ -592,15 +585,14 @@ out:
* allocation attempts with zonelists containing them may now recall the OOM
* killer, if necessary.
*/
-void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
+void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
{
struct zoneref *z;
struct zone *zone;
spin_lock(&zone_scan_lock);
- for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+ for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
zone_clear_flag(zone, ZONE_OOM_LOCKED);
- }
spin_unlock(&zone_scan_lock);
}
@@ -694,9 +686,9 @@ void pagefault_out_of_memory(void)
if (mem_cgroup_oom_synchronize(true))
return;
- zonelist = node_zonelist(first_online_node, GFP_KERNEL);
- if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
- out_of_memory(NULL, 0, 0, NULL, false);
- clear_zonelist_oom(zonelist, GFP_KERNEL);
+ zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
+ if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) {
+ out_of_memory(zonelist, 0, 0, NULL, false);
+ oom_zonelist_unlock(zonelist, GFP_KERNEL);
}
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e0c943014eb7..91d73ef1744d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -261,14 +261,11 @@ static unsigned long global_dirtyable_memory(void)
*/
void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
{
+ const unsigned long available_memory = global_dirtyable_memory();
unsigned long background;
unsigned long dirty;
- unsigned long uninitialized_var(available_memory);
struct task_struct *tsk;
- if (!vm_dirty_bytes || !dirty_background_bytes)
- available_memory = global_dirtyable_memory();
-
if (vm_dirty_bytes)
dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
else
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef44ad736ca1..d0e3d2fee585 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -680,9 +680,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
int migratetype = 0;
int batch_free = 0;
int to_free = count;
+ unsigned long nr_scanned;
spin_lock(&zone->lock);
- zone->pages_scanned = 0;
+ nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+ if (nr_scanned)
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
while (to_free) {
struct page *page;
@@ -731,8 +734,11 @@ static void free_one_page(struct zone *zone,
unsigned int order,
int migratetype)
{
+ unsigned long nr_scanned;
spin_lock(&zone->lock);
- zone->pages_scanned = 0;
+ nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+ if (nr_scanned)
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
__free_one_page(page, pfn, zone, order, migratetype);
if (unlikely(!is_migrate_isolate(migratetype)))
@@ -1257,15 +1263,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
unsigned long flags;
- int to_drain;
- unsigned long batch;
+ int to_drain, batch;
local_irq_save(flags);
batch = ACCESS_ONCE(pcp->batch);
- if (pcp->count >= batch)
- to_drain = batch;
- else
- to_drain = pcp->count;
+ to_drain = min(pcp->count, batch);
if (to_drain > 0) {
free_pcppages_bulk(zone, to_drain, pcp);
pcp->count -= to_drain;
@@ -1610,6 +1612,9 @@ again:
}
__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+ if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 &&
+ !zone_is_fair_depleted(zone))
+ zone_set_flag(zone, ZONE_FAIR_DEPLETED);
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1712,7 +1717,6 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
{
/* free_pages my go negative - that's OK */
long min = mark;
- long lowmem_reserve = z->lowmem_reserve[classzone_idx];
int o;
long free_cma = 0;
@@ -1727,7 +1731,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
#endif
- if (free_pages - free_cma <= min + lowmem_reserve)
+ if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
return false;
for (o = 0; o < order; o++) {
/* At the next order, this order's pages become unavailable */
@@ -1922,6 +1926,18 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
#endif /* CONFIG_NUMA */
+static void reset_alloc_batches(struct zone *preferred_zone)
+{
+ struct zone *zone = preferred_zone->zone_pgdat->node_zones;
+
+ do {
+ mod_zone_page_state(zone, NR_ALLOC_BATCH,
+ high_wmark_pages(zone) - low_wmark_pages(zone) -
+ atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
+ zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
+ } while (zone++ != preferred_zone);
+}
+
/*
* get_page_from_freelist goes through the zonelist trying to allocate
* a page.
@@ -1939,8 +1955,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
int did_zlc_setup = 0; /* just call zlc_setup() one time */
bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
(gfp_mask & __GFP_WRITE);
+ int nr_fair_skipped = 0;
+ bool zonelist_rescan;
zonelist_scan:
+ zonelist_rescan = false;
+
/*
* Scan zonelist, looking for a zone with enough free.
* See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
@@ -1964,9 +1984,11 @@ zonelist_scan:
*/
if (alloc_flags & ALLOC_FAIR) {
if (!zone_local(preferred_zone, zone))
+ break;
+ if (zone_is_fair_depleted(zone)) {
+ nr_fair_skipped++;
continue;
- if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
- continue;
+ }
}
/*
* When allocating a page cache page for writing, we
@@ -2072,13 +2094,7 @@ this_zone_full:
zlc_mark_zone_full(zonelist, z);
}
- if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) {
- /* Disable zlc cache for second zonelist scan */
- zlc_active = 0;
- goto zonelist_scan;
- }
-
- if (page)
+ if (page) {
/*
* page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
* necessary to allocate the page. The expectation is
@@ -2087,8 +2103,37 @@ this_zone_full:
* for !PFMEMALLOC purposes.
*/
page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
+ return page;
+ }
- return page;
+ /*
+ * The first pass makes sure allocations are spread fairly within the
+ * local node. However, the local node might have free pages left
+ * after the fairness batches are exhausted, and remote zones haven't
+ * even been considered yet. Try once more without fairness, and
+ * include remote zones now, before entering the slowpath and waking
+ * kswapd: prefer spilling to a remote zone over swapping locally.
+ */
+ if (alloc_flags & ALLOC_FAIR) {
+ alloc_flags &= ~ALLOC_FAIR;
+ if (nr_fair_skipped) {
+ zonelist_rescan = true;
+ reset_alloc_batches(preferred_zone);
+ }
+ if (nr_online_nodes > 1)
+ zonelist_rescan = true;
+ }
+
+ if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
+ /* Disable zlc cache for second zonelist scan */
+ zlc_active = 0;
+ zonelist_rescan = true;
+ }
+
+ if (zonelist_rescan)
+ goto zonelist_scan;
+
+ return NULL;
}
/*
@@ -2201,8 +2246,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
{
struct page *page;
- /* Acquire the OOM killer lock for the zones in zonelist */
- if (!try_set_zonelist_oom(zonelist, gfp_mask)) {
+ /* Acquire the per-zone oom lock for each zone */
+ if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
schedule_timeout_uninterruptible(1);
return NULL;
}
@@ -2240,7 +2285,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
out_of_memory(zonelist, gfp_mask, order, nodemask, false);
out:
- clear_zonelist_oom(zonelist, gfp_mask);
+ oom_zonelist_unlock(zonelist, gfp_mask);
return page;
}
@@ -2409,28 +2454,6 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
return page;
}
-static void reset_alloc_batches(struct zonelist *zonelist,
- enum zone_type high_zoneidx,
- struct zone *preferred_zone)
-{
- struct zoneref *z;
- struct zone *zone;
-
- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
- /*
- * Only reset the batches of zones that were actually
- * considered in the fairness pass, we don't want to
- * trash fairness information for zones that are not
- * actually part of this zonelist's round-robin cycle.
- */
- if (!zone_local(preferred_zone, zone))
- continue;
- mod_zone_page_state(zone, NR_ALLOC_BATCH,
- high_wmark_pages(zone) - low_wmark_pages(zone) -
- atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
- }
-}
-
static void wake_all_kswapds(unsigned int order,
struct zonelist *zonelist,
enum zone_type high_zoneidx,
@@ -2616,14 +2639,6 @@ rebalance:
goto got_pg;
/*
- * It can become very expensive to allocate transparent hugepages at
- * fault, so use asynchronous memory compaction for THP unless it is
- * khugepaged trying to collapse.
- */
- if (!(gfp_mask & __GFP_NO_KSWAPD) || (current->flags & PF_KTHREAD))
- migration_mode = MIGRATE_SYNC_LIGHT;
-
- /*
* If compaction is deferred for high-order allocations, it is because
* sync compaction recently failed. In this is the case and the caller
* requested a movable allocation that does not heavily disrupt the
@@ -2633,6 +2648,15 @@ rebalance:
(gfp_mask & __GFP_NO_KSWAPD))
goto nopage;
+ /*
+ * It can become very expensive to allocate transparent hugepages at
+ * fault, so use asynchronous memory compaction for THP unless it is
+ * khugepaged trying to collapse.
+ */
+ if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE ||
+ (current->flags & PF_KTHREAD))
+ migration_mode = MIGRATE_SYNC_LIGHT;
+
/* Try direct reclaim and then allocating */
page = __alloc_pages_direct_reclaim(gfp_mask, order,
zonelist, high_zoneidx,
@@ -2766,29 +2790,12 @@ retry_cpuset:
if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
#endif
-retry:
/* First allocation attempt */
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
zonelist, high_zoneidx, alloc_flags,
preferred_zone, classzone_idx, migratetype);
if (unlikely(!page)) {
/*
- * The first pass makes sure allocations are spread
- * fairly within the local node. However, the local
- * node might have free pages left after the fairness
- * batches are exhausted, and remote zones haven't
- * even been considered yet. Try once more without
- * fairness, and include remote zones now, before
- * entering the slowpath and waking kswapd: prefer
- * spilling to a remote zone over swapping locally.
- */
- if (alloc_flags & ALLOC_FAIR) {
- reset_alloc_batches(zonelist, high_zoneidx,
- preferred_zone);
- alloc_flags &= ~ALLOC_FAIR;
- goto retry;
- }
- /*
* Runtime PM, block IO and its error handling path
* can deadlock because I/O on the device might not
* complete.
@@ -2962,7 +2969,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
* Note this is not alloc_pages_exact_node() which allocates on a specific node,
* but is not exact.
*/
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
{
unsigned order = get_order(size);
struct page *p = alloc_pages_node(nid, gfp_mask, order);
@@ -2970,7 +2977,6 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
return NULL;
return make_alloc_exact((unsigned long)page_address(p), order, size);
}
-EXPORT_SYMBOL(alloc_pages_exact_nid);
/**
* free_pages_exact - release memory allocated via alloc_pages_exact()
@@ -3052,7 +3058,7 @@ static inline void show_node(struct zone *zone)
void si_meminfo(struct sysinfo *val)
{
val->totalram = totalram_pages;
- val->sharedram = 0;
+ val->sharedram = global_page_state(NR_SHMEM);
val->freeram = global_page_state(NR_FREE_PAGES);
val->bufferram = nr_blockdev_pages();
val->totalhigh = totalhigh_pages;
@@ -3072,6 +3078,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
managed_pages += pgdat->node_zones[zone_type].managed_pages;
val->totalram = managed_pages;
+ val->sharedram = node_page_state(nid, NR_SHMEM);
val->freeram = node_page_state(nid, NR_FREE_PAGES);
#ifdef CONFIG_HIGHMEM
val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
@@ -3253,12 +3260,12 @@ void show_free_areas(unsigned int filter)
K(zone_page_state(zone, NR_BOUNCE)),
K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
- zone->pages_scanned,
+ K(zone_page_state(zone, NR_PAGES_SCANNED)),
(!zone_reclaimable(zone) ? "yes" : "no")
);
printk("lowmem_reserve[]:");
for (i = 0; i < MAX_NR_ZONES; i++)
- printk(" %lu", zone->lowmem_reserve[i]);
+ printk(" %ld", zone->lowmem_reserve[i]);
printk("\n");
}
@@ -4969,6 +4976,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
pgdat->node_start_pfn = node_start_pfn;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+ printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid,
+ (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1);
#endif
calculate_node_totalpages(pgdat, start_pfn, end_pfn,
zones_size, zholes_size);
@@ -5579,7 +5588,7 @@ static void calculate_totalreserve_pages(void)
for_each_online_pgdat(pgdat) {
for (i = 0; i < MAX_NR_ZONES; i++) {
struct zone *zone = pgdat->node_zones + i;
- unsigned long max = 0;
+ long max = 0;
/* Find valid and maximum lowmem_reserve in the zone */
for (j = i; j < MAX_NR_ZONES; j++) {
diff --git a/mm/readahead.c b/mm/readahead.c
index 0ca36a7770b1..17b9172ec37f 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -326,7 +326,6 @@ static unsigned long get_next_ra_size(struct file_ra_state *ra,
* - thrashing threshold in memory tight systems
*/
static pgoff_t count_history_pages(struct address_space *mapping,
- struct file_ra_state *ra,
pgoff_t offset, unsigned long max)
{
pgoff_t head;
@@ -349,7 +348,7 @@ static int try_context_readahead(struct address_space *mapping,
{
pgoff_t size;
- size = count_history_pages(mapping, ra, offset, max);
+ size = count_history_pages(mapping, offset, max);
/*
* not enough history pages:
diff --git a/mm/rmap.c b/mm/rmap.c
index 22a4a7699cdb..3e8491c504f8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1032,25 +1032,6 @@ void page_add_new_anon_rmap(struct page *page,
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
hpage_nr_pages(page));
__page_set_anon_rmap(page, vma, address, 1);
-
- VM_BUG_ON_PAGE(PageLRU(page), page);
- if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
- SetPageActive(page);
- lru_cache_add(page);
- return;
- }
-
- if (!TestSetPageMlocked(page)) {
- /*
- * We use the irq-unsafe __mod_zone_page_stat because this
- * counter is not modified from interrupt context, and the pte
- * lock is held(spinlock), which implies preemption disabled.
- */
- __mod_zone_page_state(page_zone(page), NR_MLOCK,
- hpage_nr_pages(page));
- count_vm_event(UNEVICTABLE_PGMLOCKED);
- }
- add_page_to_unevictable_list(page);
}
/**
@@ -1108,7 +1089,6 @@ void page_remove_rmap(struct page *page)
if (unlikely(PageHuge(page)))
goto out;
if (anon) {
- mem_cgroup_uncharge_page(page);
if (PageTransHuge(page))
__dec_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES);
diff --git a/mm/shmem.c b/mm/shmem.c
index 0f018002dd64..5909f298c3e7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -149,6 +149,19 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
vm_unacct_memory(VM_ACCT(size));
}
+static inline int shmem_reacct_size(unsigned long flags,
+ loff_t oldsize, loff_t newsize)
+{
+ if (!(flags & VM_NORESERVE)) {
+ if (VM_ACCT(newsize) > VM_ACCT(oldsize))
+ return security_vm_enough_memory_mm(current->mm,
+ VM_ACCT(newsize) - VM_ACCT(oldsize));
+ else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
+ vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
+ }
+ return 0;
+}
+
/*
* ... whereas tmpfs objects are accounted incrementally as
* pages are allocated, in order to allow huge sparse files.
@@ -280,7 +293,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
*/
static int shmem_add_to_page_cache(struct page *page,
struct address_space *mapping,
- pgoff_t index, gfp_t gfp, void *expected)
+ pgoff_t index, void *expected)
{
int error;
@@ -406,7 +419,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
pvec.pages, indices);
if (!pvec.nr)
break;
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -434,7 +446,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
@@ -482,7 +493,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
index = start;
continue;
}
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -518,7 +528,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
index++;
}
@@ -549,6 +558,10 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
loff_t newsize = attr->ia_size;
if (newsize != oldsize) {
+ error = shmem_reacct_size(SHMEM_I(inode)->flags,
+ oldsize, newsize);
+ if (error)
+ return error;
i_size_write(inode, newsize);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
}
@@ -604,7 +617,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
radswap = swp_to_radix_entry(swap);
index = radix_tree_locate_item(&mapping->page_tree, radswap);
if (index == -1)
- return 0;
+ return -EAGAIN; /* tell shmem_unuse we found nothing */
/*
* Move _head_ to start search for next from here.
@@ -649,7 +662,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
*/
if (!error)
error = shmem_add_to_page_cache(*pagep, mapping, index,
- GFP_NOWAIT, radswap);
+ radswap);
if (error != -ENOMEM) {
/*
* Truncation and eviction use free_swap_and_cache(), which
@@ -663,7 +676,6 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
spin_unlock(&info->lock);
swap_free(swap);
}
- error = 1; /* not an error, but entry was found */
}
return error;
}
@@ -675,7 +687,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
{
struct list_head *this, *next;
struct shmem_inode_info *info;
- int found = 0;
+ struct mem_cgroup *memcg;
int error = 0;
/*
@@ -690,26 +702,32 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
* the shmem_swaplist_mutex which might hold up shmem_writepage().
* Charged back to the user (not to caller) when swap account is used.
*/
- error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL);
+ error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg);
if (error)
goto out;
/* No radix_tree_preload: swap entry keeps a place for page in tree */
+ error = -EAGAIN;
mutex_lock(&shmem_swaplist_mutex);
list_for_each_safe(this, next, &shmem_swaplist) {
info = list_entry(this, struct shmem_inode_info, swaplist);
if (info->swapped)
- found = shmem_unuse_inode(info, swap, &page);
+ error = shmem_unuse_inode(info, swap, &page);
else
list_del_init(&info->swaplist);
cond_resched();
- if (found)
+ if (error != -EAGAIN)
break;
+ /* found nothing in this: move on to search the next */
}
mutex_unlock(&shmem_swaplist_mutex);
- if (found < 0)
- error = found;
+ if (error) {
+ if (error != -ENOMEM)
+ error = 0;
+ mem_cgroup_cancel_charge(page, memcg);
+ } else
+ mem_cgroup_commit_charge(page, memcg, true);
out:
unlock_page(page);
page_cache_release(page);
@@ -813,7 +831,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
}
mutex_unlock(&shmem_swaplist_mutex);
- swapcache_free(swap, NULL);
+ swapcache_free(swap);
redirty:
set_page_dirty(page);
if (wbc->for_reclaim)
@@ -986,7 +1004,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
*/
oldpage = newpage;
} else {
- mem_cgroup_replace_page_cache(oldpage, newpage);
+ mem_cgroup_migrate(oldpage, newpage, false);
lru_cache_add_anon(newpage);
*pagep = newpage;
}
@@ -1013,6 +1031,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo;
+ struct mem_cgroup *memcg;
struct page *page;
swp_entry_t swap;
int error;
@@ -1091,11 +1110,10 @@ repeat:
goto failed;
}
- error = mem_cgroup_charge_file(page, current->mm,
- gfp & GFP_RECLAIM_MASK);
+ error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
- gfp, swp_to_radix_entry(swap));
+ swp_to_radix_entry(swap));
/*
* We already confirmed swap under page lock, and make
* no memory allocation here, so usually no possibility
@@ -1108,12 +1126,16 @@ repeat:
* Reset swap.val? No, leave it so "failed" goes back to
* "repeat": reading a hole and writing should succeed.
*/
- if (error)
+ if (error) {
+ mem_cgroup_cancel_charge(page, memcg);
delete_from_swap_cache(page);
+ }
}
if (error)
goto failed;
+ mem_cgroup_commit_charge(page, memcg, true);
+
spin_lock(&info->lock);
info->swapped--;
shmem_recalc_inode(inode);
@@ -1149,22 +1171,22 @@ repeat:
__SetPageSwapBacked(page);
__set_page_locked(page);
if (sgp == SGP_WRITE)
- init_page_accessed(page);
+ __SetPageReferenced(page);
- error = mem_cgroup_charge_file(page, current->mm,
- gfp & GFP_RECLAIM_MASK);
+ error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
if (error)
goto decused;
error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
- gfp, NULL);
+ NULL);
radix_tree_preload_end();
}
if (error) {
- mem_cgroup_uncharge_cache_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
goto decused;
}
+ mem_cgroup_commit_charge(page, memcg, false);
lru_cache_add_anon(page);
spin_lock(&info->lock);
@@ -2960,16 +2982,16 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
this.len = strlen(name);
this.hash = 0; /* will go */
sb = shm_mnt->mnt_sb;
+ path.mnt = mntget(shm_mnt);
path.dentry = d_alloc_pseudo(sb, &this);
if (!path.dentry)
goto put_memory;
d_set_d_op(path.dentry, &anon_ops);
- path.mnt = mntget(shm_mnt);
res = ERR_PTR(-ENOSPC);
inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
if (!inode)
- goto put_dentry;
+ goto put_memory;
inode->i_flags |= i_flags;
d_instantiate(path.dentry, inode);
@@ -2977,19 +2999,19 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
clear_nlink(inode); /* It is unlinked */
res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
if (IS_ERR(res))
- goto put_dentry;
+ goto put_path;
res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
&shmem_file_operations);
if (IS_ERR(res))
- goto put_dentry;
+ goto put_path;
return res;
-put_dentry:
- path_put(&path);
put_memory:
shmem_unacct_size(flags, size);
+put_path:
+ path_put(&path);
return res;
}
diff --git a/mm/slab.c b/mm/slab.c
index 3070b929a1bf..2e60bf3dedbb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -191,7 +191,6 @@ struct array_cache {
unsigned int limit;
unsigned int batchcount;
unsigned int touched;
- spinlock_t lock;
void *entry[]; /*
* Must have this definition in here for the proper
* alignment of array_cache. Also simplifies accessing
@@ -203,6 +202,11 @@ struct array_cache {
*/
};
+struct alien_cache {
+ spinlock_t lock;
+ struct array_cache ac;
+};
+
#define SLAB_OBJ_PFMEMALLOC 1
static inline bool is_obj_pfmemalloc(void *objp)
{
@@ -242,7 +246,8 @@ static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
static int drain_freelist(struct kmem_cache *cache,
struct kmem_cache_node *n, int tofree);
static void free_block(struct kmem_cache *cachep, void **objpp, int len,
- int node);
+ int node, struct list_head *list);
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
static void cache_reap(struct work_struct *unused);
@@ -267,7 +272,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
#define MAKE_LIST(cachep, listp, slab, nodeid) \
do { \
INIT_LIST_HEAD(listp); \
- list_splice(&(cachep->node[nodeid]->slab), listp); \
+ list_splice(&get_node(cachep, nodeid)->slab, listp); \
} while (0)
#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
@@ -465,143 +470,6 @@ static struct kmem_cache kmem_cache_boot = {
.name = "kmem_cache",
};
-#define BAD_ALIEN_MAGIC 0x01020304ul
-
-#ifdef CONFIG_LOCKDEP
-
-/*
- * Slab sometimes uses the kmalloc slabs to store the slab headers
- * for other slabs "off slab".
- * The locking for this is tricky in that it nests within the locks
- * of all other slabs in a few places; to deal with this special
- * locking we put on-slab caches into a separate lock-class.
- *
- * We set lock class for alien array caches which are up during init.
- * The lock annotation will be lost if all cpus of a node goes down and
- * then comes back up during hotplug
- */
-static struct lock_class_key on_slab_l3_key;
-static struct lock_class_key on_slab_alc_key;
-
-static struct lock_class_key debugobj_l3_key;
-static struct lock_class_key debugobj_alc_key;
-
-static void slab_set_lock_classes(struct kmem_cache *cachep,
- struct lock_class_key *l3_key, struct lock_class_key *alc_key,
- int q)
-{
- struct array_cache **alc;
- struct kmem_cache_node *n;
- int r;
-
- n = cachep->node[q];
- if (!n)
- return;
-
- lockdep_set_class(&n->list_lock, l3_key);
- alc = n->alien;
- /*
- * FIXME: This check for BAD_ALIEN_MAGIC
- * should go away when common slab code is taught to
- * work even without alien caches.
- * Currently, non NUMA code returns BAD_ALIEN_MAGIC
- * for alloc_alien_cache,
- */
- if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
- return;
- for_each_node(r) {
- if (alc[r])
- lockdep_set_class(&alc[r]->lock, alc_key);
- }
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
- slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
- int node;
-
- for_each_online_node(node)
- slab_set_debugobj_lock_classes_node(cachep, node);
-}
-
-static void init_node_lock_keys(int q)
-{
- int i;
-
- if (slab_state < UP)
- return;
-
- for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
- struct kmem_cache_node *n;
- struct kmem_cache *cache = kmalloc_caches[i];
-
- if (!cache)
- continue;
-
- n = cache->node[q];
- if (!n || OFF_SLAB(cache))
- continue;
-
- slab_set_lock_classes(cache, &on_slab_l3_key,
- &on_slab_alc_key, q);
- }
-}
-
-static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
-{
- if (!cachep->node[q])
- return;
-
- slab_set_lock_classes(cachep, &on_slab_l3_key,
- &on_slab_alc_key, q);
-}
-
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
- int node;
-
- VM_BUG_ON(OFF_SLAB(cachep));
- for_each_node(node)
- on_slab_lock_classes_node(cachep, node);
-}
-
-static inline void init_lock_keys(void)
-{
- int node;
-
- for_each_node(node)
- init_node_lock_keys(node);
-}
-#else
-static void init_node_lock_keys(int q)
-{
-}
-
-static inline void init_lock_keys(void)
-{
-}
-
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
-}
-
-static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-}
-#endif
-
static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -792,13 +660,8 @@ static void start_cpu_timer(int cpu)
}
}
-static struct array_cache *alloc_arraycache(int node, int entries,
- int batchcount, gfp_t gfp)
+static void init_arraycache(struct array_cache *ac, int limit, int batch)
{
- int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
- struct array_cache *nc = NULL;
-
- nc = kmalloc_node(memsize, gfp, node);
/*
* The array_cache structures contain pointers to free object.
* However, when such objects are allocated or transferred to another
@@ -806,15 +669,24 @@ static struct array_cache *alloc_arraycache(int node, int entries,
* valid references during a kmemleak scan. Therefore, kmemleak must
* not scan such objects.
*/
- kmemleak_no_scan(nc);
- if (nc) {
- nc->avail = 0;
- nc->limit = entries;
- nc->batchcount = batchcount;
- nc->touched = 0;
- spin_lock_init(&nc->lock);
+ kmemleak_no_scan(ac);
+ if (ac) {
+ ac->avail = 0;
+ ac->limit = limit;
+ ac->batchcount = batch;
+ ac->touched = 0;
}
- return nc;
+}
+
+static struct array_cache *alloc_arraycache(int node, int entries,
+ int batchcount, gfp_t gfp)
+{
+ size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
+ struct array_cache *ac = NULL;
+
+ ac = kmalloc_node(memsize, gfp, node);
+ init_arraycache(ac, entries, batchcount);
+ return ac;
}
static inline bool is_slab_pfmemalloc(struct page *page)
@@ -826,7 +698,7 @@ static inline bool is_slab_pfmemalloc(struct page *page)
static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
struct array_cache *ac)
{
- struct kmem_cache_node *n = cachep->node[numa_mem_id()];
+ struct kmem_cache_node *n = get_node(cachep, numa_mem_id());
struct page *page;
unsigned long flags;
@@ -881,7 +753,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
* If there are empty slabs on the slabs_free list and we are
* being forced to refill the cache, mark this one !pfmemalloc.
*/
- n = cachep->node[numa_mem_id()];
+ n = get_node(cachep, numa_mem_id());
if (!list_empty(&n->slabs_free) && force_refill) {
struct page *page = virt_to_head_page(objp);
ClearPageSlabPfmemalloc(page);
@@ -961,12 +833,13 @@ static int transfer_objects(struct array_cache *to,
#define drain_alien_cache(cachep, alien) do { } while (0)
#define reap_alien(cachep, n) do { } while (0)
-static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static inline struct alien_cache **alloc_alien_cache(int node,
+ int limit, gfp_t gfp)
{
- return (struct array_cache **)BAD_ALIEN_MAGIC;
+ return NULL;
}
-static inline void free_alien_cache(struct array_cache **ac_ptr)
+static inline void free_alien_cache(struct alien_cache **ac_ptr)
{
}
@@ -992,46 +865,60 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
-static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static struct alien_cache *__alloc_alien_cache(int node, int entries,
+ int batch, gfp_t gfp)
+{
+ size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
+ struct alien_cache *alc = NULL;
+
+ alc = kmalloc_node(memsize, gfp, node);
+ init_arraycache(&alc->ac, entries, batch);
+ spin_lock_init(&alc->lock);
+ return alc;
+}
+
+static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
{
- struct array_cache **ac_ptr;
- int memsize = sizeof(void *) * nr_node_ids;
+ struct alien_cache **alc_ptr;
+ size_t memsize = sizeof(void *) * nr_node_ids;
int i;
if (limit > 1)
limit = 12;
- ac_ptr = kzalloc_node(memsize, gfp, node);
- if (ac_ptr) {
- for_each_node(i) {
- if (i == node || !node_online(i))
- continue;
- ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
- if (!ac_ptr[i]) {
- for (i--; i >= 0; i--)
- kfree(ac_ptr[i]);
- kfree(ac_ptr);
- return NULL;
- }
+ alc_ptr = kzalloc_node(memsize, gfp, node);
+ if (!alc_ptr)
+ return NULL;
+
+ for_each_node(i) {
+ if (i == node || !node_online(i))
+ continue;
+ alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
+ if (!alc_ptr[i]) {
+ for (i--; i >= 0; i--)
+ kfree(alc_ptr[i]);
+ kfree(alc_ptr);
+ return NULL;
}
}
- return ac_ptr;
+ return alc_ptr;
}
-static void free_alien_cache(struct array_cache **ac_ptr)
+static void free_alien_cache(struct alien_cache **alc_ptr)
{
int i;
- if (!ac_ptr)
+ if (!alc_ptr)
return;
for_each_node(i)
- kfree(ac_ptr[i]);
- kfree(ac_ptr);
+ kfree(alc_ptr[i]);
+ kfree(alc_ptr);
}
static void __drain_alien_cache(struct kmem_cache *cachep,
- struct array_cache *ac, int node)
+ struct array_cache *ac, int node,
+ struct list_head *list)
{
- struct kmem_cache_node *n = cachep->node[node];
+ struct kmem_cache_node *n = get_node(cachep, node);
if (ac->avail) {
spin_lock(&n->list_lock);
@@ -1043,7 +930,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
if (n->shared)
transfer_objects(n->shared, ac, ac->limit);
- free_block(cachep, ac->entry, ac->avail, node);
+ free_block(cachep, ac->entry, ac->avail, node, list);
ac->avail = 0;
spin_unlock(&n->list_lock);
}
@@ -1057,28 +944,40 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
int node = __this_cpu_read(slab_reap_node);
if (n->alien) {
- struct array_cache *ac = n->alien[node];
+ struct alien_cache *alc = n->alien[node];
+ struct array_cache *ac;
+
+ if (alc) {
+ ac = &alc->ac;
+ if (ac->avail && spin_trylock_irq(&alc->lock)) {
+ LIST_HEAD(list);
- if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
- __drain_alien_cache(cachep, ac, node);
- spin_unlock_irq(&ac->lock);
+ __drain_alien_cache(cachep, ac, node, &list);
+ spin_unlock_irq(&alc->lock);
+ slabs_destroy(cachep, &list);
+ }
}
}
}
static void drain_alien_cache(struct kmem_cache *cachep,
- struct array_cache **alien)
+ struct alien_cache **alien)
{
int i = 0;
+ struct alien_cache *alc;
struct array_cache *ac;
unsigned long flags;
for_each_online_node(i) {
- ac = alien[i];
- if (ac) {
- spin_lock_irqsave(&ac->lock, flags);
- __drain_alien_cache(cachep, ac, i);
- spin_unlock_irqrestore(&ac->lock, flags);
+ alc = alien[i];
+ if (alc) {
+ LIST_HEAD(list);
+
+ ac = &alc->ac;
+ spin_lock_irqsave(&alc->lock, flags);
+ __drain_alien_cache(cachep, ac, i, &list);
+ spin_unlock_irqrestore(&alc->lock, flags);
+ slabs_destroy(cachep, &list);
}
}
}
@@ -1087,8 +986,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{
int nodeid = page_to_nid(virt_to_page(objp));
struct kmem_cache_node *n;
- struct array_cache *alien = NULL;
+ struct alien_cache *alien = NULL;
+ struct array_cache *ac;
int node;
+ LIST_HEAD(list);
node = numa_mem_id();
@@ -1099,21 +1000,25 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
if (likely(nodeid == node))
return 0;
- n = cachep->node[node];
+ n = get_node(cachep, node);
STATS_INC_NODEFREES(cachep);
if (n->alien && n->alien[nodeid]) {
alien = n->alien[nodeid];
+ ac = &alien->ac;
spin_lock(&alien->lock);
- if (unlikely(alien->avail == alien->limit)) {
+ if (unlikely(ac->avail == ac->limit)) {
STATS_INC_ACOVERFLOW(cachep);
- __drain_alien_cache(cachep, alien, nodeid);
+ __drain_alien_cache(cachep, ac, nodeid, &list);
}
- ac_put_obj(cachep, alien, objp);
+ ac_put_obj(cachep, ac, objp);
spin_unlock(&alien->lock);
+ slabs_destroy(cachep, &list);
} else {
- spin_lock(&(cachep->node[nodeid])->list_lock);
- free_block(cachep, &objp, 1, nodeid);
- spin_unlock(&(cachep->node[nodeid])->list_lock);
+ n = get_node(cachep, nodeid);
+ spin_lock(&n->list_lock);
+ free_block(cachep, &objp, 1, nodeid, &list);
+ spin_unlock(&n->list_lock);
+ slabs_destroy(cachep, &list);
}
return 1;
}
@@ -1132,7 +1037,7 @@ static int init_cache_node_node(int node)
{
struct kmem_cache *cachep;
struct kmem_cache_node *n;
- const int memsize = sizeof(struct kmem_cache_node);
+ const size_t memsize = sizeof(struct kmem_cache_node);
list_for_each_entry(cachep, &slab_caches, list) {
/*
@@ -1140,7 +1045,8 @@ static int init_cache_node_node(int node)
* begin anything. Make sure some other cpu on this
* node has not already allocated this
*/
- if (!cachep->node[node]) {
+ n = get_node(cachep, node);
+ if (!n) {
n = kmalloc_node(memsize, GFP_KERNEL, node);
if (!n)
return -ENOMEM;
@@ -1156,11 +1062,11 @@ static int init_cache_node_node(int node)
cachep->node[node] = n;
}
- spin_lock_irq(&cachep->node[node]->list_lock);
- cachep->node[node]->free_limit =
+ spin_lock_irq(&n->list_lock);
+ n->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- spin_unlock_irq(&cachep->node[node]->list_lock);
+ spin_unlock_irq(&n->list_lock);
}
return 0;
}
@@ -1181,12 +1087,13 @@ static void cpuup_canceled(long cpu)
list_for_each_entry(cachep, &slab_caches, list) {
struct array_cache *nc;
struct array_cache *shared;
- struct array_cache **alien;
+ struct alien_cache **alien;
+ LIST_HEAD(list);
/* cpu is dead; no one can alloc from it. */
nc = cachep->array[cpu];
cachep->array[cpu] = NULL;
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
goto free_array_cache;
@@ -1196,7 +1103,7 @@ static void cpuup_canceled(long cpu)
/* Free limit for this kmem_cache_node */
n->free_limit -= cachep->batchcount;
if (nc)
- free_block(cachep, nc->entry, nc->avail, node);
+ free_block(cachep, nc->entry, nc->avail, node, &list);
if (!cpumask_empty(mask)) {
spin_unlock_irq(&n->list_lock);
@@ -1206,7 +1113,7 @@ static void cpuup_canceled(long cpu)
shared = n->shared;
if (shared) {
free_block(cachep, shared->entry,
- shared->avail, node);
+ shared->avail, node, &list);
n->shared = NULL;
}
@@ -1221,6 +1128,7 @@ static void cpuup_canceled(long cpu)
free_alien_cache(alien);
}
free_array_cache:
+ slabs_destroy(cachep, &list);
kfree(nc);
}
/*
@@ -1229,7 +1137,7 @@ free_array_cache:
* shrink each nodelist to its limit.
*/
list_for_each_entry(cachep, &slab_caches, list) {
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
continue;
drain_freelist(cachep, n, slabs_tofree(cachep, n));
@@ -1260,7 +1168,7 @@ static int cpuup_prepare(long cpu)
list_for_each_entry(cachep, &slab_caches, list) {
struct array_cache *nc;
struct array_cache *shared = NULL;
- struct array_cache **alien = NULL;
+ struct alien_cache **alien = NULL;
nc = alloc_arraycache(node, cachep->limit,
cachep->batchcount, GFP_KERNEL);
@@ -1284,7 +1192,7 @@ static int cpuup_prepare(long cpu)
}
}
cachep->array[cpu] = nc;
- n = cachep->node[node];
+ n = get_node(cachep, node);
BUG_ON(!n);
spin_lock_irq(&n->list_lock);
@@ -1305,13 +1213,7 @@ static int cpuup_prepare(long cpu)
spin_unlock_irq(&n->list_lock);
kfree(shared);
free_alien_cache(alien);
- if (cachep->flags & SLAB_DEBUG_OBJECTS)
- slab_set_debugobj_lock_classes_node(cachep, node);
- else if (!OFF_SLAB(cachep) &&
- !(cachep->flags & SLAB_DESTROY_BY_RCU))
- on_slab_lock_classes_node(cachep, node);
}
- init_node_lock_keys(node);
return 0;
bad:
@@ -1395,7 +1297,7 @@ static int __meminit drain_cache_node_node(int node)
list_for_each_entry(cachep, &slab_caches, list) {
struct kmem_cache_node *n;
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
continue;
@@ -1575,10 +1477,6 @@ void __init kmem_cache_init(void)
memcpy(ptr, cpu_cache_get(kmem_cache),
sizeof(struct arraycache_init));
- /*
- * Do not assume that spinlocks can be initialized via memcpy:
- */
- spin_lock_init(&ptr->lock);
kmem_cache->array[smp_processor_id()] = ptr;
@@ -1588,10 +1486,6 @@ void __init kmem_cache_init(void)
!= &initarray_generic.cache);
memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
sizeof(struct arraycache_init));
- /*
- * Do not assume that spinlocks can be initialized via memcpy:
- */
- spin_lock_init(&ptr->lock);
kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
}
@@ -1628,9 +1522,6 @@ void __init kmem_cache_init_late(void)
BUG();
mutex_unlock(&slab_mutex);
- /* Annotate slab for lockdep -- annotate the malloc caches */
- init_lock_keys();
-
/* Done! */
slab_state = FULL;
@@ -1690,14 +1581,10 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n",
cachep->name, cachep->size, cachep->gfporder);
- for_each_online_node(node) {
+ for_each_kmem_cache_node(cachep, node, n) {
unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
unsigned long active_slabs = 0, num_slabs = 0;
- n = cachep->node[node];
- if (!n)
- continue;
-
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->slabs_full, lru) {
active_objs += cachep->num;
@@ -1724,7 +1611,8 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
}
/*
- * Interface to system's page allocator. No need to hold the cache-lock.
+ * Interface to system's page allocator. No need to hold the
+ * kmem_cache_node ->list_lock.
*
* If we requested dmaable memory, we will get it. Even if we
* did not request dmaable memory, we might get it, but that
@@ -2026,9 +1914,9 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
* @cachep: cache pointer being destroyed
* @page: page pointer being destroyed
*
- * Destroy all the objs in a slab, and release the mem back to the system.
- * Before calling the slab must have been unlinked from the cache. The
- * cache-lock is not held/needed.
+ * Destroy all the objs in a slab page, and release the mem back to the system.
+ * Before calling the slab page must have been unlinked from the cache. The
+ * kmem_cache_node ->list_lock is not held/needed.
*/
static void slab_destroy(struct kmem_cache *cachep, struct page *page)
{
@@ -2060,6 +1948,16 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
kmem_cache_free(cachep->freelist_cache, freelist);
}
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
+{
+ struct page *page, *n;
+
+ list_for_each_entry_safe(page, n, list, lru) {
+ list_del(&page->lru);
+ slab_destroy(cachep, page);
+ }
+}
+
/**
* calculate_slab_order - calculate size (page order) of slabs
* @cachep: pointer to the cache that is being created
@@ -2405,17 +2303,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
return err;
}
- if (flags & SLAB_DEBUG_OBJECTS) {
- /*
- * Would deadlock through slab_destroy()->call_rcu()->
- * debug_object_activate()->kmem_cache_alloc().
- */
- WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
-
- slab_set_debugobj_lock_classes(cachep);
- } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
- on_slab_lock_classes(cachep);
-
return 0;
}
@@ -2434,7 +2321,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
+ assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
#endif
}
@@ -2442,7 +2329,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&cachep->node[node]->list_lock);
+ assert_spin_locked(&get_node(cachep, node)->list_lock);
#endif
}
@@ -2462,12 +2349,16 @@ static void do_drain(void *arg)
struct kmem_cache *cachep = arg;
struct array_cache *ac;
int node = numa_mem_id();
+ struct kmem_cache_node *n;
+ LIST_HEAD(list);
check_irq_off();
ac = cpu_cache_get(cachep);
- spin_lock(&cachep->node[node]->list_lock);
- free_block(cachep, ac->entry, ac->avail, node);
- spin_unlock(&cachep->node[node]->list_lock);
+ n = get_node(cachep, node);
+ spin_lock(&n->list_lock);
+ free_block(cachep, ac->entry, ac->avail, node, &list);
+ spin_unlock(&n->list_lock);
+ slabs_destroy(cachep, &list);
ac->avail = 0;
}
@@ -2478,17 +2369,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
on_each_cpu(do_drain, cachep, 1);
check_irq_on();
- for_each_online_node(node) {
- n = cachep->node[node];
- if (n && n->alien)
+ for_each_kmem_cache_node(cachep, node, n)
+ if (n->alien)
drain_alien_cache(cachep, n->alien);
- }
- for_each_online_node(node) {
- n = cachep->node[node];
- if (n)
- drain_array(cachep, n, n->shared, 1, node);
- }
+ for_each_kmem_cache_node(cachep, node, n)
+ drain_array(cachep, n, n->shared, 1, node);
}
/*
@@ -2534,17 +2420,14 @@ out:
int __kmem_cache_shrink(struct kmem_cache *cachep)
{
- int ret = 0, i = 0;
+ int ret = 0;
+ int node;
struct kmem_cache_node *n;
drain_cpu_caches(cachep);
check_irq_on();
- for_each_online_node(i) {
- n = cachep->node[i];
- if (!n)
- continue;
-
+ for_each_kmem_cache_node(cachep, node, n) {
drain_freelist(cachep, n, slabs_tofree(cachep, n));
ret += !list_empty(&n->slabs_full) ||
@@ -2566,13 +2449,11 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
kfree(cachep->array[i]);
/* NUMA: free the node structures */
- for_each_online_node(i) {
- n = cachep->node[i];
- if (n) {
- kfree(n->shared);
- free_alien_cache(n->alien);
- kfree(n);
- }
+ for_each_kmem_cache_node(cachep, i, n) {
+ kfree(n->shared);
+ free_alien_cache(n->alien);
+ kfree(n);
+ cachep->node[i] = NULL;
}
return 0;
}
@@ -2751,7 +2632,7 @@ static int cache_grow(struct kmem_cache *cachep,
/* Take the node list lock to change the colour_next on this node */
check_irq_off();
- n = cachep->node[nodeid];
+ n = get_node(cachep, nodeid);
spin_lock(&n->list_lock);
/* Get colour for the slab, and cal the next value. */
@@ -2920,7 +2801,7 @@ retry:
*/
batchcount = BATCHREFILL_LIMIT;
}
- n = cachep->node[node];
+ n = get_node(cachep, node);
BUG_ON(ac->avail > 0 || !n);
spin_lock(&n->list_lock);
@@ -3060,7 +2941,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
{
- if (cachep == kmem_cache)
+ if (unlikely(cachep == kmem_cache))
return false;
return should_failslab(cachep->object_size, flags, cachep->flags);
@@ -3169,8 +3050,8 @@ retry:
nid = zone_to_nid(zone);
if (cpuset_zone_allowed_hardwall(zone, flags) &&
- cache->node[nid] &&
- cache->node[nid]->free_objects) {
+ get_node(cache, nid) &&
+ get_node(cache, nid)->free_objects) {
obj = ____cache_alloc_node(cache,
flags | GFP_THISNODE, nid);
if (obj)
@@ -3233,7 +3114,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
int x;
VM_BUG_ON(nodeid > num_online_nodes());
- n = cachep->node[nodeid];
+ n = get_node(cachep, nodeid);
BUG_ON(!n);
retry:
@@ -3304,7 +3185,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
if (nodeid == NUMA_NO_NODE)
nodeid = slab_node;
- if (unlikely(!cachep->node[nodeid])) {
+ if (unlikely(!get_node(cachep, nodeid))) {
/* Node not bootstrapped yet */
ptr = fallback_alloc(cachep, flags);
goto out;
@@ -3405,12 +3286,13 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
/*
* Caller needs to acquire correct kmem_cache_node's list_lock
+ * @list: List of detached free slabs should be freed by caller
*/
-static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
- int node)
+static void free_block(struct kmem_cache *cachep, void **objpp,
+ int nr_objects, int node, struct list_head *list)
{
int i;
- struct kmem_cache_node *n;
+ struct kmem_cache_node *n = get_node(cachep, node);
for (i = 0; i < nr_objects; i++) {
void *objp;
@@ -3420,7 +3302,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
objp = objpp[i];
page = virt_to_head_page(objp);
- n = cachep->node[node];
list_del(&page->lru);
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp, node);
@@ -3431,13 +3312,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
if (page->active == 0) {
if (n->free_objects > n->free_limit) {
n->free_objects -= cachep->num;
- /* No need to drop any previously held
- * lock here, even if we have a off-slab slab
- * descriptor it is guaranteed to come from
- * a different cache, refer to comments before
- * alloc_slabmgmt.
- */
- slab_destroy(cachep, page);
+ list_add_tail(&page->lru, list);
} else {
list_add(&page->lru, &n->slabs_free);
}
@@ -3456,13 +3331,14 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
int batchcount;
struct kmem_cache_node *n;
int node = numa_mem_id();
+ LIST_HEAD(list);
batchcount = ac->batchcount;
#if DEBUG
BUG_ON(!batchcount || batchcount > ac->avail);
#endif
check_irq_off();
- n = cachep->node[node];
+ n = get_node(cachep, node);
spin_lock(&n->list_lock);
if (n->shared) {
struct array_cache *shared_array = n->shared;
@@ -3477,7 +3353,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
}
}
- free_block(cachep, ac->entry, batchcount, node);
+ free_block(cachep, ac->entry, batchcount, node, &list);
free_done:
#if STATS
{
@@ -3498,6 +3374,7 @@ free_done:
}
#endif
spin_unlock(&n->list_lock);
+ slabs_destroy(cachep, &list);
ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
}
@@ -3754,7 +3631,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
int node;
struct kmem_cache_node *n;
struct array_cache *new_shared;
- struct array_cache **new_alien = NULL;
+ struct alien_cache **new_alien = NULL;
for_each_online_node(node) {
@@ -3775,15 +3652,16 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
}
}
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (n) {
struct array_cache *shared = n->shared;
+ LIST_HEAD(list);
spin_lock_irq(&n->list_lock);
if (shared)
free_block(cachep, shared->entry,
- shared->avail, node);
+ shared->avail, node, &list);
n->shared = new_shared;
if (!n->alien) {
@@ -3793,6 +3671,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
n->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
kfree(shared);
free_alien_cache(new_alien);
continue;
@@ -3820,9 +3699,8 @@ fail:
/* Cache is not active yet. Roll back what we did */
node--;
while (node >= 0) {
- if (cachep->node[node]) {
- n = cachep->node[node];
-
+ n = get_node(cachep, node);
+ if (n) {
kfree(n->shared);
free_alien_cache(n->alien);
kfree(n);
@@ -3883,12 +3761,20 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
cachep->shared = shared;
for_each_online_cpu(i) {
+ LIST_HEAD(list);
struct array_cache *ccold = new->new[i];
+ int node;
+ struct kmem_cache_node *n;
+
if (!ccold)
continue;
- spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
- free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
- spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
+
+ node = cpu_to_mem(i);
+ n = get_node(cachep, node);
+ spin_lock_irq(&n->list_lock);
+ free_block(cachep, ccold->entry, ccold->avail, node, &list);
+ spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
kfree(ccold);
}
kfree(new);
@@ -3996,6 +3882,7 @@ skip_setup:
static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
struct array_cache *ac, int force, int node)
{
+ LIST_HEAD(list);
int tofree;
if (!ac || !ac->avail)
@@ -4008,12 +3895,13 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
tofree = force ? ac->avail : (ac->limit + 4) / 5;
if (tofree > ac->avail)
tofree = (ac->avail + 1) / 2;
- free_block(cachep, ac->entry, tofree, node);
+ free_block(cachep, ac->entry, tofree, node, &list);
ac->avail -= tofree;
memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void *) * ac->avail);
}
spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
}
}
@@ -4048,7 +3936,7 @@ static void cache_reap(struct work_struct *w)
* have established with reasonable certainty that
* we can do some work if the lock was obtained.
*/
- n = searchp->node[node];
+ n = get_node(searchp, node);
reap_alien(searchp, n);
@@ -4100,10 +3988,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
active_objs = 0;
num_slabs = 0;
- for_each_online_node(node) {
- n = cachep->node[node];
- if (!n)
- continue;
+ for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
spin_lock_irq(&n->list_lock);
@@ -4328,10 +4213,7 @@ static int leaks_show(struct seq_file *m, void *p)
x[1] = 0;
- for_each_online_node(node) {
- n = cachep->node[node];
- if (!n)
- continue;
+ for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
spin_lock_irq(&n->list_lock);
diff --git a/mm/slab.h b/mm/slab.h
index 961a3fb1f5a2..928823e17e58 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -260,9 +260,8 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
WARN_ON_ONCE(1);
return s;
}
-#endif
-
+#ifndef CONFIG_SLOB
/*
* The slab lists for all objects.
*/
@@ -277,7 +276,7 @@ struct kmem_cache_node {
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
struct array_cache *shared; /* shared per node */
- struct array_cache **alien; /* on other nodes */
+ struct alien_cache **alien; /* on other nodes */
unsigned long next_reap; /* updated without locking */
int free_touched; /* updated without locking */
#endif
@@ -294,5 +293,22 @@ struct kmem_cache_node {
};
+static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
+{
+ return s->node[node];
+}
+
+/*
+ * Iterator over all nodes. The body will be executed for each node that has
+ * a kmem_cache_node structure allocated (which is true for all online nodes)
+ */
+#define for_each_kmem_cache_node(__s, __node, __n) \
+ for (__node = 0; __n = get_node(__s, __node), __node < nr_node_ids; __node++) \
+ if (__n)
+
+#endif
+
void *slab_next(struct seq_file *m, void *p, loff_t *pos);
void slab_stop(struct seq_file *m, void *p);
+
+#endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index d31c4bacc6a2..d319502b2403 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -19,6 +19,8 @@
#include <asm/tlbflush.h>
#include <asm/page.h>
#include <linux/memcontrol.h>
+
+#define CREATE_TRACE_POINTS
#include <trace/events/kmem.h>
#include "slab.h"
@@ -787,3 +789,102 @@ static int __init slab_proc_init(void)
}
module_init(slab_proc_init);
#endif /* CONFIG_SLABINFO */
+
+static __always_inline void *__do_krealloc(const void *p, size_t new_size,
+ gfp_t flags)
+{
+ void *ret;
+ size_t ks = 0;
+
+ if (p)
+ ks = ksize(p);
+
+ if (ks >= new_size)
+ return (void *)p;
+
+ ret = kmalloc_track_caller(new_size, flags);
+ if (ret && p)
+ memcpy(ret, p, ks);
+
+ return ret;
+}
+
+/**
+ * __krealloc - like krealloc() but don't free @p.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * This function is like krealloc() except it never frees the originally
+ * allocated buffer. Use this if you don't want to free the buffer immediately
+ * like, for example, with RCU.
+ */
+void *__krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+ if (unlikely(!new_size))
+ return ZERO_SIZE_PTR;
+
+ return __do_krealloc(p, new_size, flags);
+
+}
+EXPORT_SYMBOL(__krealloc);
+
+/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * The contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes. If @p is %NULL, krealloc()
+ * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
+ * %NULL pointer, the object pointed to is freed.
+ */
+void *krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+ void *ret;
+
+ if (unlikely(!new_size)) {
+ kfree(p);
+ return ZERO_SIZE_PTR;
+ }
+
+ ret = __do_krealloc(p, new_size, flags);
+ if (ret && p != ret)
+ kfree(p);
+
+ return ret;
+}
+EXPORT_SYMBOL(krealloc);
+
+/**
+ * kzfree - like kfree but zero memory
+ * @p: object to free memory of
+ *
+ * The memory of the object @p points to is zeroed before freed.
+ * If @p is %NULL, kzfree() does nothing.
+ *
+ * Note: this function zeroes the whole allocated buffer which can be a good
+ * deal bigger than the requested buffer size passed to kmalloc(). So be
+ * careful when using this function in performance sensitive code.
+ */
+void kzfree(const void *p)
+{
+ size_t ks;
+ void *mem = (void *)p;
+
+ if (unlikely(ZERO_OR_NULL_PTR(mem)))
+ return;
+ ks = ksize(mem);
+ memset(mem, 0, ks);
+ kfree(mem);
+}
+EXPORT_SYMBOL(kzfree);
+
+/* Tracepoints definitions. */
+EXPORT_TRACEPOINT_SYMBOL(kmalloc);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
+EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kfree);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
diff --git a/mm/slub.c b/mm/slub.c
index 73004808537e..3e8afcc07a76 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -233,11 +233,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
* Core slab cache functions
*******************************************************************/
-static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
-{
- return s->node[node];
-}
-
/* Verify that a pointer has an address that is valid within a slab page */
static inline int check_valid_pointer(struct kmem_cache *s,
struct page *page, const void *object)
@@ -288,6 +283,10 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
__p += (__s)->size)
+#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
+ for (__p = (__addr), __idx = 1; __idx <= __objects;\
+ __p += (__s)->size, __idx++)
+
/* Determine object index from a given position */
static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
{
@@ -382,9 +381,9 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
if (cmpxchg_double(&page->freelist, &page->counters,
- freelist_old, counters_old,
- freelist_new, counters_new))
- return 1;
+ freelist_old, counters_old,
+ freelist_new, counters_new))
+ return 1;
} else
#endif
{
@@ -418,9 +417,9 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
if (cmpxchg_double(&page->freelist, &page->counters,
- freelist_old, counters_old,
- freelist_new, counters_new))
- return 1;
+ freelist_old, counters_old,
+ freelist_new, counters_new))
+ return 1;
} else
#endif
{
@@ -945,60 +944,6 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
}
/*
- * Hooks for other subsystems that check memory allocations. In a typical
- * production configuration these hooks all should produce no code at all.
- */
-static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
-{
- kmemleak_alloc(ptr, size, 1, flags);
-}
-
-static inline void kfree_hook(const void *x)
-{
- kmemleak_free(x);
-}
-
-static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
-{
- flags &= gfp_allowed_mask;
- lockdep_trace_alloc(flags);
- might_sleep_if(flags & __GFP_WAIT);
-
- return should_failslab(s->object_size, flags, s->flags);
-}
-
-static inline void slab_post_alloc_hook(struct kmem_cache *s,
- gfp_t flags, void *object)
-{
- flags &= gfp_allowed_mask;
- kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
- kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
-}
-
-static inline void slab_free_hook(struct kmem_cache *s, void *x)
-{
- kmemleak_free_recursive(x, s->flags);
-
- /*
- * Trouble is that we may no longer disable interrupts in the fast path
- * So in order to make the debug calls that expect irqs to be
- * disabled we need to disable interrupts temporarily.
- */
-#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
- {
- unsigned long flags;
-
- local_irq_save(flags);
- kmemcheck_slab_free(s, x, s->object_size);
- debug_check_no_locks_freed(x, s->object_size);
- local_irq_restore(flags);
- }
-#endif
- if (!(s->flags & SLAB_DEBUG_OBJECTS))
- debug_check_no_obj_freed(x, s->object_size);
-}
-
-/*
* Tracking of fully allocated slabs for debugging purposes.
*/
static void add_full(struct kmem_cache *s,
@@ -1282,6 +1227,12 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
+#endif /* CONFIG_SLUB_DEBUG */
+
+/*
+ * Hooks for other subsystems that check memory allocations. In a typical
+ * production configuration these hooks all should produce no code at all.
+ */
static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
{
kmemleak_alloc(ptr, size, 1, flags);
@@ -1293,21 +1244,44 @@ static inline void kfree_hook(const void *x)
}
static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
- { return 0; }
+{
+ flags &= gfp_allowed_mask;
+ lockdep_trace_alloc(flags);
+ might_sleep_if(flags & __GFP_WAIT);
+
+ return should_failslab(s->object_size, flags, s->flags);
+}
-static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
- void *object)
+static inline void slab_post_alloc_hook(struct kmem_cache *s,
+ gfp_t flags, void *object)
{
- kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
- flags & gfp_allowed_mask);
+ flags &= gfp_allowed_mask;
+ kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
+ kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
}
static inline void slab_free_hook(struct kmem_cache *s, void *x)
{
kmemleak_free_recursive(x, s->flags);
-}
-#endif /* CONFIG_SLUB_DEBUG */
+ /*
+ * Trouble is that we may no longer disable interrupts in the fast path
+ * So in order to make the debug calls that expect irqs to be
+ * disabled we need to disable interrupts temporarily.
+ */
+#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
+ {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ kmemcheck_slab_free(s, x, s->object_size);
+ debug_check_no_locks_freed(x, s->object_size);
+ local_irq_restore(flags);
+ }
+#endif
+ if (!(s->flags & SLAB_DEBUG_OBJECTS))
+ debug_check_no_obj_freed(x, s->object_size);
+}
/*
* Slab allocation and freeing
@@ -1409,9 +1383,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
void *start;
- void *last;
void *p;
int order;
+ int idx;
BUG_ON(flags & GFP_SLAB_BUG_MASK);
@@ -1432,14 +1406,13 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
if (unlikely(s->flags & SLAB_POISON))
memset(start, POISON_INUSE, PAGE_SIZE << order);
- last = start;
- for_each_object(p, s, start, page->objects) {
- setup_object(s, page, last);
- set_freepointer(s, last, p);
- last = p;
+ for_each_object_idx(p, idx, s, start, page->objects) {
+ setup_object(s, page, p);
+ if (likely(idx < page->objects))
+ set_freepointer(s, p, p + s->size);
+ else
+ set_freepointer(s, p, NULL);
}
- setup_object(s, page, last);
- set_freepointer(s, last, NULL);
page->freelist = start;
page->inuse = page->objects;
@@ -2162,6 +2135,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
int node;
+ struct kmem_cache_node *n;
if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
return;
@@ -2176,15 +2150,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
s->name);
- for_each_online_node(node) {
- struct kmem_cache_node *n = get_node(s, node);
+ for_each_kmem_cache_node(s, node, n) {
unsigned long nr_slabs;
unsigned long nr_objs;
unsigned long nr_free;
- if (!n)
- continue;
-
nr_free = count_partial(n, count_free);
nr_slabs = node_nr_slabs(n);
nr_objs = node_nr_objs(n);
@@ -2928,13 +2898,10 @@ static void early_kmem_cache_node_alloc(int node)
static void free_kmem_cache_nodes(struct kmem_cache *s)
{
int node;
+ struct kmem_cache_node *n;
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = s->node[node];
-
- if (n)
- kmem_cache_free(kmem_cache_node, n);
-
+ for_each_kmem_cache_node(s, node, n) {
+ kmem_cache_free(kmem_cache_node, n);
s->node[node] = NULL;
}
}
@@ -3222,12 +3189,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
static inline int kmem_cache_close(struct kmem_cache *s)
{
int node;
+ struct kmem_cache_node *n;
flush_all(s);
/* Attempt to free all objects */
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
-
+ for_each_kmem_cache_node(s, node, n) {
free_partial(s, n);
if (n->nr_partial || slabs_node(s, node))
return 1;
@@ -3412,9 +3378,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
return -ENOMEM;
flush_all(s);
- for_each_node_state(node, N_NORMAL_MEMORY) {
- n = get_node(s, node);
-
+ for_each_kmem_cache_node(s, node, n) {
if (!n->nr_partial)
continue;
@@ -3586,6 +3550,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
{
int node;
struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ struct kmem_cache_node *n;
memcpy(s, static_cache, kmem_cache->object_size);
@@ -3595,19 +3560,16 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
* IPIs around.
*/
__flush_cpu_slab(s, smp_processor_id());
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ for_each_kmem_cache_node(s, node, n) {
struct page *p;
- if (n) {
- list_for_each_entry(p, &n->partial, lru)
- p->slab_cache = s;
+ list_for_each_entry(p, &n->partial, lru)
+ p->slab_cache = s;
#ifdef CONFIG_SLUB_DEBUG
- list_for_each_entry(p, &n->full, lru)
- p->slab_cache = s;
+ list_for_each_entry(p, &n->full, lru)
+ p->slab_cache = s;
#endif
- }
}
list_add(&s->list, &slab_caches);
return s;
@@ -3960,16 +3922,14 @@ static long validate_slab_cache(struct kmem_cache *s)
unsigned long count = 0;
unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
sizeof(unsigned long), GFP_KERNEL);
+ struct kmem_cache_node *n;
if (!map)
return -ENOMEM;
flush_all(s);
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
-
+ for_each_kmem_cache_node(s, node, n)
count += validate_slab_node(s, n, map);
- }
kfree(map);
return count;
}
@@ -4123,6 +4083,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
int node;
unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
sizeof(unsigned long), GFP_KERNEL);
+ struct kmem_cache_node *n;
if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
GFP_TEMPORARY)) {
@@ -4132,8 +4093,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
/* Push back cpu slabs */
flush_all(s);
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ for_each_kmem_cache_node(s, node, n) {
unsigned long flags;
struct page *page;
@@ -4205,7 +4165,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
#endif
#ifdef SLUB_RESILIENCY_TEST
-static void resiliency_test(void)
+static void __init resiliency_test(void)
{
u8 *p;
@@ -4332,8 +4292,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
get_online_mems();
#ifdef CONFIG_SLUB_DEBUG
if (flags & SO_ALL) {
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ struct kmem_cache_node *n;
+
+ for_each_kmem_cache_node(s, node, n) {
if (flags & SO_TOTAL)
x = atomic_long_read(&n->total_objects);
@@ -4349,9 +4310,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
} else
#endif
if (flags & SO_PARTIAL) {
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ struct kmem_cache_node *n;
+ for_each_kmem_cache_node(s, node, n) {
if (flags & SO_TOTAL)
x = count_partial(n, count_total);
else if (flags & SO_OBJECTS)
@@ -4364,7 +4325,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
}
x = sprintf(buf, "%lu", total);
#ifdef CONFIG_NUMA
- for_each_node_state(node, N_NORMAL_MEMORY)
+ for (node = 0; node < nr_node_ids; node++)
if (nodes[node])
x += sprintf(buf + x, " N%d=%lu",
node, nodes[node]);
@@ -4378,16 +4339,12 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
static int any_slab_objects(struct kmem_cache *s)
{
int node;
+ struct kmem_cache_node *n;
- for_each_online_node(node) {
- struct kmem_cache_node *n = get_node(s, node);
-
- if (!n)
- continue;
-
+ for_each_kmem_cache_node(s, node, n)
if (atomic_long_read(&n->total_objects))
return 1;
- }
+
return 0;
}
#endif
@@ -4509,7 +4466,7 @@ SLAB_ATTR_RO(ctor);
static ssize_t aliases_show(struct kmem_cache *s, char *buf)
{
- return sprintf(buf, "%d\n", s->refcount - 1);
+ return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
}
SLAB_ATTR_RO(aliases);
@@ -5171,12 +5128,6 @@ static char *create_unique_id(struct kmem_cache *s)
*p++ = '-';
p += sprintf(p, "%07d", s->size);
-#ifdef CONFIG_MEMCG_KMEM
- if (!is_root_cache(s))
- p += sprintf(p, "-%08d",
- memcg_cache_id(s->memcg_params->memcg));
-#endif
-
BUG_ON(p > name + ID_STR_LENGTH - 1);
return name;
}
@@ -5342,13 +5293,9 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
unsigned long nr_objs = 0;
unsigned long nr_free = 0;
int node;
+ struct kmem_cache_node *n;
- for_each_online_node(node) {
- struct kmem_cache_node *n = get_node(s, node);
-
- if (!n)
- continue;
-
+ for_each_kmem_cache_node(s, node, n) {
nr_slabs += node_nr_slabs(n);
nr_objs += node_nr_objs(n);
nr_free += count_partial(n, count_free);
diff --git a/mm/swap.c b/mm/swap.c
index 9e8e3472248b..6b2dc3897cd5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -62,6 +62,7 @@ static void __page_cache_release(struct page *page)
del_page_from_lru_list(page, lruvec, page_off_lru(page));
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
+ mem_cgroup_uncharge(page);
}
static void __put_single_page(struct page *page)
@@ -501,7 +502,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
SetPageActive(page);
lru += LRU_ACTIVE;
add_page_to_lru_list(page, lruvec, lru);
- trace_mm_lru_activate(page, page_to_pfn(page));
+ trace_mm_lru_activate(page);
__count_vm_event(PGACTIVATE);
update_page_reclaim_stat(lruvec, file, 1);
@@ -589,6 +590,9 @@ static void __lru_cache_activate_page(struct page *page)
* inactive,unreferenced -> inactive,referenced
* inactive,referenced -> active,unreferenced
* active,unreferenced -> active,referenced
+ *
+ * When a newly allocated page is not yet visible, so safe for non-atomic ops,
+ * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
*/
void mark_page_accessed(struct page *page)
{
@@ -614,17 +618,6 @@ void mark_page_accessed(struct page *page)
}
EXPORT_SYMBOL(mark_page_accessed);
-/*
- * Used to mark_page_accessed(page) that is not visible yet and when it is
- * still safe to use non-atomic ops
- */
-void init_page_accessed(struct page *page)
-{
- if (!PageReferenced(page))
- __SetPageReferenced(page);
-}
-EXPORT_SYMBOL(init_page_accessed);
-
static void __lru_cache_add(struct page *page)
{
struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
@@ -695,6 +688,40 @@ void add_page_to_unevictable_list(struct page *page)
spin_unlock_irq(&zone->lru_lock);
}
+/**
+ * lru_cache_add_active_or_unevictable
+ * @page: the page to be added to LRU
+ * @vma: vma in which page is mapped for determining reclaimability
+ *
+ * Place @page on the active or unevictable LRU list, depending on its
+ * evictability. Note that if the page is not evictable, it goes
+ * directly back onto it's zone's unevictable list, it does NOT use a
+ * per cpu pagevec.
+ */
+void lru_cache_add_active_or_unevictable(struct page *page,
+ struct vm_area_struct *vma)
+{
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+
+ if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
+ SetPageActive(page);
+ lru_cache_add(page);
+ return;
+ }
+
+ if (!TestSetPageMlocked(page)) {
+ /*
+ * We use the irq-unsafe __mod_zone_page_stat because this
+ * counter is not modified from interrupt context, and the pte
+ * lock is held(spinlock), which implies preemption disabled.
+ */
+ __mod_zone_page_state(page_zone(page), NR_MLOCK,
+ hpage_nr_pages(page));
+ count_vm_event(UNEVICTABLE_PGMLOCKED);
+ }
+ add_page_to_unevictable_list(page);
+}
+
/*
* If the page can not be invalidated, it is moved to the
* inactive list to speed up its reclaim. It is moved to the
@@ -921,6 +948,7 @@ void release_pages(struct page **pages, int nr, bool cold)
if (zone)
spin_unlock_irqrestore(&zone->lru_lock, flags);
+ mem_cgroup_uncharge_list(&pages_to_free);
free_hot_cold_page_list(&pages_to_free, cold);
}
EXPORT_SYMBOL(release_pages);
@@ -996,7 +1024,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
SetPageLRU(page);
add_page_to_lru_list(page, lruvec, lru);
update_page_reclaim_stat(lruvec, file, active);
- trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));
+ trace_mm_lru_insertion(page, lru);
}
/*
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 2972eee184a4..e160151da6b8 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -176,7 +176,7 @@ int add_to_swap(struct page *page, struct list_head *list)
if (unlikely(PageTransHuge(page)))
if (unlikely(split_huge_page_to_list(page, list))) {
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
return 0;
}
@@ -202,7 +202,7 @@ int add_to_swap(struct page *page, struct list_head *list)
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
* clear SWAP_HAS_CACHE flag.
*/
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
return 0;
}
}
@@ -225,7 +225,7 @@ void delete_from_swap_cache(struct page *page)
__delete_from_swap_cache(page);
spin_unlock_irq(&address_space->tree_lock);
- swapcache_free(entry, page);
+ swapcache_free(entry);
page_cache_release(page);
}
@@ -386,7 +386,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
* clear SWAP_HAS_CACHE flag.
*/
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
} while (err != -ENOMEM);
if (new_page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4c524f7bd0bf..8798b2e0ac59 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -843,16 +843,13 @@ void swap_free(swp_entry_t entry)
/*
* Called after dropping swapcache to decrease refcnt to swap entries.
*/
-void swapcache_free(swp_entry_t entry, struct page *page)
+void swapcache_free(swp_entry_t entry)
{
struct swap_info_struct *p;
- unsigned char count;
p = swap_info_get(entry);
if (p) {
- count = swap_entry_free(p, entry, SWAP_HAS_CACHE);
- if (page)
- mem_cgroup_uncharge_swapcache(page, entry, count != 0);
+ swap_entry_free(p, entry, SWAP_HAS_CACHE);
spin_unlock(&p->lock);
}
}
@@ -1106,15 +1103,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
if (unlikely(!page))
return -ENOMEM;
- if (mem_cgroup_try_charge_swapin(vma->vm_mm, page,
- GFP_KERNEL, &memcg)) {
+ if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg)) {
ret = -ENOMEM;
goto out_nolock;
}
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
- mem_cgroup_cancel_charge_swapin(memcg);
+ mem_cgroup_cancel_charge(page, memcg);
ret = 0;
goto out;
}
@@ -1124,11 +1120,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
get_page(page);
set_pte_at(vma->vm_mm, addr, pte,
pte_mkold(mk_pte(page, vma->vm_page_prot)));
- if (page == swapcache)
+ if (page == swapcache) {
page_add_anon_rmap(page, vma, addr);
- else /* ksm created a completely new copy */
+ mem_cgroup_commit_charge(page, memcg, true);
+ } else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, addr);
- mem_cgroup_commit_charge_swapin(page, memcg);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
+ }
swap_free(entry);
/*
* Move the page to the active list so it is not
diff --git a/mm/truncate.c b/mm/truncate.c
index eda247307164..96d167372d89 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -281,7 +281,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE),
indices)) {
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -307,7 +306,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
@@ -369,7 +367,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
pagevec_release(&pvec);
break;
}
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -394,7 +391,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
index++;
}
cleancache_invalidate_inode(mapping);
@@ -493,7 +489,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
indices)) {
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -522,7 +517,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
@@ -553,7 +547,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
BUG_ON(page_has_private(page));
__delete_from_page_cache(page, NULL);
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
if (mapping->a_ops->freepage)
mapping->a_ops->freepage(page);
@@ -602,7 +595,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
indices)) {
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -655,7 +647,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
diff --git a/mm/util.c b/mm/util.c
index d5ea733c5082..ff7a52e7386e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -3,6 +3,7 @@
#include <linux/string.h>
#include <linux/compiler.h>
#include <linux/export.h>
+#include <linux/ctype.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/security.h>
@@ -16,9 +17,6 @@
#include "internal.h"
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
/**
* kstrdup - allocate space for and copy an existing string
* @s: the string to duplicate
@@ -65,6 +63,35 @@ char *kstrndup(const char *s, size_t max, gfp_t gfp)
EXPORT_SYMBOL(kstrndup);
/**
+ * kstrimdup - Trim and copy a %NUL terminated string.
+ * @s: the string to trim and duplicate
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Returns an address, which the caller must kfree, containing
+ * a duplicate of the passed string with leading and/or trailing
+ * whitespace (as defined by isspace) removed.
+ */
+char *kstrimdup(const char *s, gfp_t gfp)
+{
+ char *buf;
+ char *begin = skip_spaces(s);
+ size_t len = strlen(begin);
+
+ while (len && isspace(begin[len - 1]))
+ len--;
+
+ buf = kmalloc_track_caller(len + 1, gfp);
+ if (!buf)
+ return NULL;
+
+ memcpy(buf, begin, len);
+ buf[len] = '\0';
+
+ return buf;
+}
+EXPORT_SYMBOL(kstrimdup);
+
+/**
* kmemdup - duplicate region of memory
*
* @src: memory region to duplicate
@@ -112,97 +139,6 @@ void *memdup_user(const void __user *src, size_t len)
}
EXPORT_SYMBOL(memdup_user);
-static __always_inline void *__do_krealloc(const void *p, size_t new_size,
- gfp_t flags)
-{
- void *ret;
- size_t ks = 0;
-
- if (p)
- ks = ksize(p);
-
- if (ks >= new_size)
- return (void *)p;
-
- ret = kmalloc_track_caller(new_size, flags);
- if (ret && p)
- memcpy(ret, p, ks);
-
- return ret;
-}
-
-/**
- * __krealloc - like krealloc() but don't free @p.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * This function is like krealloc() except it never frees the originally
- * allocated buffer. Use this if you don't want to free the buffer immediately
- * like, for example, with RCU.
- */
-void *__krealloc(const void *p, size_t new_size, gfp_t flags)
-{
- if (unlikely(!new_size))
- return ZERO_SIZE_PTR;
-
- return __do_krealloc(p, new_size, flags);
-
-}
-EXPORT_SYMBOL(__krealloc);
-
-/**
- * krealloc - reallocate memory. The contents will remain unchanged.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * The contents of the object pointed to are preserved up to the
- * lesser of the new and old sizes. If @p is %NULL, krealloc()
- * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
- * %NULL pointer, the object pointed to is freed.
- */
-void *krealloc(const void *p, size_t new_size, gfp_t flags)
-{
- void *ret;
-
- if (unlikely(!new_size)) {
- kfree(p);
- return ZERO_SIZE_PTR;
- }
-
- ret = __do_krealloc(p, new_size, flags);
- if (ret && p != ret)
- kfree(p);
-
- return ret;
-}
-EXPORT_SYMBOL(krealloc);
-
-/**
- * kzfree - like kfree but zero memory
- * @p: object to free memory of
- *
- * The memory of the object @p points to is zeroed before freed.
- * If @p is %NULL, kzfree() does nothing.
- *
- * Note: this function zeroes the whole allocated buffer which can be a good
- * deal bigger than the requested buffer size passed to kmalloc(). So be
- * careful when using this function in performance sensitive code.
- */
-void kzfree(const void *p)
-{
- size_t ks;
- void *mem = (void *)p;
-
- if (unlikely(ZERO_OR_NULL_PTR(mem)))
- return;
- ks = ksize(mem);
- memset(mem, 0, ks);
- kfree(mem);
-}
-EXPORT_SYMBOL(kzfree);
-
/*
* strndup_user - duplicate an existing string from user space
* @s: The string to duplicate
@@ -504,11 +440,3 @@ out_mm:
out:
return res;
}
-
-/* Tracepoints definitions. */
-EXPORT_TRACEPOINT_SYMBOL(kmalloc);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
-EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
-EXPORT_TRACEPOINT_SYMBOL(kfree);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f64632b67196..2b0aa5486092 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1270,19 +1270,15 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
}
EXPORT_SYMBOL_GPL(unmap_kernel_range);
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
{
unsigned long addr = (unsigned long)area->addr;
unsigned long end = addr + get_vm_area_size(area);
int err;
- err = vmap_page_range(addr, end, prot, *pages);
- if (err > 0) {
- *pages += err;
- err = 0;
- }
+ err = vmap_page_range(addr, end, prot, pages);
- return err;
+ return err > 0 ? 0 : err;
}
EXPORT_SYMBOL_GPL(map_vm_area);
@@ -1548,7 +1544,7 @@ void *vmap(struct page **pages, unsigned int count,
if (!area)
return NULL;
- if (map_vm_area(area, prot, &pages)) {
+ if (map_vm_area(area, prot, pages)) {
vunmap(area->addr);
return NULL;
}
@@ -1566,7 +1562,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
const int order = 0;
struct page **pages;
unsigned int nr_pages, array_size, i;
- gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+ const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+ const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
@@ -1589,12 +1586,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
- gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
if (node == NUMA_NO_NODE)
- page = alloc_page(tmp_mask);
+ page = alloc_page(alloc_mask);
else
- page = alloc_pages_node(node, tmp_mask, order);
+ page = alloc_pages_node(node, alloc_mask, order);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
@@ -1602,9 +1598,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
goto fail;
}
area->pages[i] = page;
+ if (gfp_mask & __GFP_WAIT)
+ cond_resched();
}
- if (map_vm_area(area, prot, &pages))
+ if (map_vm_area(area, prot, pages))
goto fail;
return area->addr;
@@ -2690,14 +2688,14 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
prev_end = VMALLOC_START;
- spin_lock(&vmap_area_lock);
+ rcu_read_lock();
if (list_empty(&vmap_area_list)) {
vmi->largest_chunk = VMALLOC_TOTAL;
goto out;
}
- list_for_each_entry(va, &vmap_area_list, list) {
+ list_for_each_entry_rcu(va, &vmap_area_list, list) {
unsigned long addr = va->va_start;
/*
@@ -2724,7 +2722,7 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
vmi->largest_chunk = VMALLOC_END - prev_end;
out:
- spin_unlock(&vmap_area_lock);
+ rcu_read_unlock();
}
#endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0f16ffe8eb67..d698f4f7b0f2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -59,35 +59,20 @@
#include <trace/events/vmscan.h>
struct scan_control {
- /* Incremented by the number of inactive pages that were scanned */
- unsigned long nr_scanned;
-
- /* Number of pages freed so far during a call to shrink_zones() */
- unsigned long nr_reclaimed;
-
/* How many pages shrink_list() should reclaim */
unsigned long nr_to_reclaim;
- unsigned long hibernation_mode;
-
/* This context's GFP mask */
gfp_t gfp_mask;
- int may_writepage;
-
- /* Can mapped pages be reclaimed? */
- int may_unmap;
-
- /* Can pages be swapped as part of reclaim? */
- int may_swap;
-
+ /* Allocation order */
int order;
- /* Scan (total_size >> priority) pages at once */
- int priority;
-
- /* anon vs. file LRUs scanning "ratio" */
- int swappiness;
+ /*
+ * Nodemask of nodes allowed by the caller. If NULL, all nodes
+ * are scanned.
+ */
+ nodemask_t *nodemask;
/*
* The memory cgroup that hit its limit and as a result is the
@@ -95,11 +80,27 @@ struct scan_control {
*/
struct mem_cgroup *target_mem_cgroup;
- /*
- * Nodemask of nodes allowed by the caller. If NULL, all nodes
- * are scanned.
- */
- nodemask_t *nodemask;
+ /* Scan (total_size >> priority) pages at once */
+ int priority;
+
+ unsigned int may_writepage:1;
+
+ /* Can mapped pages be reclaimed? */
+ unsigned int may_unmap:1;
+
+ /* Can pages be swapped as part of reclaim? */
+ unsigned int may_swap:1;
+
+ unsigned int hibernation_mode:1;
+
+ /* One of the zones is ready for compaction */
+ unsigned int compaction_ready:1;
+
+ /* Incremented by the number of inactive pages that were scanned */
+ unsigned long nr_scanned;
+
+ /* Number of pages freed so far during a call to shrink_zones() */
+ unsigned long nr_reclaimed;
};
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -136,7 +137,11 @@ struct scan_control {
* From 0 .. 100. Higher means more swappy.
*/
int vm_swappiness = 60;
-unsigned long vm_total_pages; /* The total number of pages which the VM controls */
+/*
+ * The total number of pages which are beyond the high watermark within all
+ * zones.
+ */
+unsigned long vm_total_pages;
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
@@ -169,7 +174,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone)
bool zone_reclaimable(struct zone *zone)
{
- return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+ return zone_page_state(zone, NR_PAGES_SCANNED) <
+ zone_reclaimable_pages(zone) * 6;
}
static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -571,9 +577,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
+ mem_cgroup_swapout(page, swap);
__delete_from_swap_cache(page);
spin_unlock_irq(&mapping->tree_lock);
- swapcache_free(swap, page);
+ swapcache_free(swap);
} else {
void (*freepage)(struct page *);
void *shadow = NULL;
@@ -594,7 +601,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
shadow = workingset_eviction(mapping, page);
__delete_from_page_cache(page, shadow);
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
if (freepage != NULL)
freepage(page);
@@ -816,7 +822,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
cond_resched();
- mem_cgroup_uncharge_start();
while (!list_empty(page_list)) {
struct address_space *mapping;
struct page *page;
@@ -1127,11 +1132,12 @@ keep:
VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
}
+ mem_cgroup_uncharge_list(&free_pages);
free_hot_cold_page_list(&free_pages, true);
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
- mem_cgroup_uncharge_end();
+
*ret_nr_dirty += nr_dirty;
*ret_nr_congested += nr_congested;
*ret_nr_unqueued_dirty += nr_unqueued_dirty;
@@ -1431,6 +1437,7 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock);
} else
@@ -1503,7 +1510,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
if (global_reclaim(sc)) {
- zone->pages_scanned += nr_scanned;
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
if (current_is_kswapd())
__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
else
@@ -1538,6 +1545,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge_list(&page_list);
free_hot_cold_page_list(&page_list, true);
/*
@@ -1652,6 +1660,7 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock);
} else
@@ -1693,7 +1702,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
&nr_scanned, sc, isolate_mode, lru);
if (global_reclaim(sc))
- zone->pages_scanned += nr_scanned;
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
reclaim_stat->recent_scanned[file] += nr_taken;
@@ -1759,6 +1768,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge_list(&l_hold);
free_hot_cold_page_list(&l_hold, true);
}
@@ -1865,8 +1875,8 @@ enum scan_balance {
* nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
* nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
*/
-static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
- unsigned long *nr)
+static void get_scan_count(struct lruvec *lruvec, int swappiness,
+ struct scan_control *sc, unsigned long *nr)
{
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
u64 fraction[2];
@@ -1909,7 +1919,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* using the memory controller's swap limit feature would be
* too expensive.
*/
- if (!global_reclaim(sc) && !sc->swappiness) {
+ if (!global_reclaim(sc) && !swappiness) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -1919,7 +1929,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* system is close to OOM, scan both anon and file equally
* (unless the swappiness setting disagrees with swapping).
*/
- if (!sc->priority && sc->swappiness) {
+ if (!sc->priority && swappiness) {
scan_balance = SCAN_EQUAL;
goto out;
}
@@ -1962,7 +1972,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
- anon_prio = sc->swappiness;
+ anon_prio = swappiness;
file_prio = 200 - anon_prio;
/*
@@ -2052,7 +2062,8 @@ out:
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
+ struct scan_control *sc)
{
unsigned long nr[NR_LRU_LISTS];
unsigned long targets[NR_LRU_LISTS];
@@ -2063,7 +2074,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
struct blk_plug plug;
bool scan_adjusted;
- get_scan_count(lruvec, sc, nr);
+ get_scan_count(lruvec, swappiness, sc, nr);
/* Record the original scan target for proportional adjustments later */
memcpy(targets, nr, sizeof(nr));
@@ -2241,9 +2252,10 @@ static inline bool should_continue_reclaim(struct zone *zone,
}
}
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc)
{
unsigned long nr_reclaimed, nr_scanned;
+ bool reclaimable = false;
do {
struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2259,11 +2271,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
struct lruvec *lruvec;
+ int swappiness;
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ swappiness = mem_cgroup_swappiness(memcg);
- sc->swappiness = mem_cgroup_swappiness(memcg);
- shrink_lruvec(lruvec, sc);
+ shrink_lruvec(lruvec, swappiness, sc);
/*
* Direct reclaim and kswapd have to scan all memory
@@ -2287,20 +2300,21 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
sc->nr_scanned - nr_scanned,
sc->nr_reclaimed - nr_reclaimed);
+ if (sc->nr_reclaimed - nr_reclaimed)
+ reclaimable = true;
+
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
+
+ return reclaimable;
}
/* Returns true if compaction should go ahead for a high-order request */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+static inline bool compaction_ready(struct zone *zone, int order)
{
unsigned long balance_gap, watermark;
bool watermark_ok;
- /* Do not consider compaction for orders reclaim is meant to satisfy */
- if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
- return false;
-
/*
* Compaction takes time to run and there are potentially other
* callers using the pages just freed. Continue reclaiming until
@@ -2309,18 +2323,18 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
*/
balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
- watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+ watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
/*
* If compaction is deferred, reclaim up to a point where
* compaction will have a chance of success when re-enabled
*/
- if (compaction_deferred(zone, sc->order))
+ if (compaction_deferred(zone, order))
return watermark_ok;
/* If compaction is not ready to start, keep reclaiming */
- if (!compaction_suitable(zone, sc->order))
+ if (!compaction_suitable(zone, order))
return false;
return watermark_ok;
@@ -2342,10 +2356,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
* If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it.
*
- * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is ready to begin. This indicates to
- * the caller that it should consider retrying the allocation instead of
- * further reclaim.
+ * Returns true if a zone was reclaimable.
*/
static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{
@@ -2354,13 +2365,13 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
unsigned long lru_pages = 0;
- bool aborted_reclaim = false;
struct reclaim_state *reclaim_state = current->reclaim_state;
gfp_t orig_mask;
struct shrink_control shrink = {
.gfp_mask = sc->gfp_mask,
};
enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
+ bool reclaimable = false;
/*
* If the number of buffer_heads in the machine exceeds the maximum
@@ -2391,22 +2402,24 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
if (sc->priority != DEF_PRIORITY &&
!zone_reclaimable(zone))
continue; /* Let kswapd poll it */
- if (IS_ENABLED(CONFIG_COMPACTION)) {
- /*
- * If we already have plenty of memory free for
- * compaction in this zone, don't free any more.
- * Even though compaction is invoked for any
- * non-zero order, only frequent costly order
- * reclamation is disruptive enough to become a
- * noticeable problem, like transparent huge
- * page allocations.
- */
- if ((zonelist_zone_idx(z) <= requested_highidx)
- && compaction_ready(zone, sc)) {
- aborted_reclaim = true;
- continue;
- }
+
+ /*
+ * If we already have plenty of memory free for
+ * compaction in this zone, don't free any more.
+ * Even though compaction is invoked for any
+ * non-zero order, only frequent costly order
+ * reclamation is disruptive enough to become a
+ * noticeable problem, like transparent huge
+ * page allocations.
+ */
+ if (IS_ENABLED(CONFIG_COMPACTION) &&
+ sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+ zonelist_zone_idx(z) <= requested_highidx &&
+ compaction_ready(zone, sc->order)) {
+ sc->compaction_ready = true;
+ continue;
}
+
/*
* This steals pages from memory cgroups over softlimit
* and returns the number of reclaimed pages and
@@ -2419,10 +2432,17 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
&nr_soft_scanned);
sc->nr_reclaimed += nr_soft_reclaimed;
sc->nr_scanned += nr_soft_scanned;
+ if (nr_soft_reclaimed)
+ reclaimable = true;
/* need some check for avoid more shrink_zone() */
}
- shrink_zone(zone, sc);
+ if (shrink_zone(zone, sc))
+ reclaimable = true;
+
+ if (global_reclaim(sc) &&
+ !reclaimable && zone_reclaimable(zone))
+ reclaimable = true;
}
/*
@@ -2445,27 +2465,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
*/
sc->gfp_mask = orig_mask;
- return aborted_reclaim;
-}
-
-/* All zones in zonelist are unreclaimable? */
-static bool all_unreclaimable(struct zonelist *zonelist,
- struct scan_control *sc)
-{
- struct zoneref *z;
- struct zone *zone;
-
- for_each_zone_zonelist_nodemask(zone, z, zonelist,
- gfp_zone(sc->gfp_mask), sc->nodemask) {
- if (!populated_zone(zone))
- continue;
- if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
- continue;
- if (zone_reclaimable(zone))
- return false;
- }
-
- return true;
+ return reclaimable;
}
/*
@@ -2489,7 +2489,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
{
unsigned long total_scanned = 0;
unsigned long writeback_threshold;
- bool aborted_reclaim;
+ bool zones_reclaimable;
delayacct_freepages_start();
@@ -2500,11 +2500,14 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
sc->priority);
sc->nr_scanned = 0;
- aborted_reclaim = shrink_zones(zonelist, sc);
+ zones_reclaimable = shrink_zones(zonelist, sc);
total_scanned += sc->nr_scanned;
if (sc->nr_reclaimed >= sc->nr_to_reclaim)
- goto out;
+ break;
+
+ if (sc->compaction_ready)
+ break;
/*
* If we're getting trouble reclaiming, start doing
@@ -2526,28 +2529,19 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
WB_REASON_TRY_TO_FREE_PAGES);
sc->may_writepage = 1;
}
- } while (--sc->priority >= 0 && !aborted_reclaim);
+ } while (--sc->priority >= 0);
-out:
delayacct_freepages_end();
if (sc->nr_reclaimed)
return sc->nr_reclaimed;
- /*
- * As hibernation is going on, kswapd is freezed so that it can't mark
- * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
- * check.
- */
- if (oom_killer_disabled)
- return 0;
-
/* Aborted reclaim to try compaction? don't OOM, then */
- if (aborted_reclaim)
+ if (sc->compaction_ready)
return 1;
- /* top priority shrink_zones still had more to do? don't OOM, then */
- if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
+ /* Any of the zones still reclaimable? Don't OOM. */
+ if (zones_reclaimable)
return 1;
return 0;
@@ -2684,15 +2678,14 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
{
unsigned long nr_reclaimed;
struct scan_control sc = {
+ .nr_to_reclaim = SWAP_CLUSTER_MAX,
.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
+ .order = order,
+ .nodemask = nodemask,
+ .priority = DEF_PRIORITY,
.may_writepage = !laptop_mode,
- .nr_to_reclaim = SWAP_CLUSTER_MAX,
.may_unmap = 1,
.may_swap = 1,
- .order = order,
- .priority = DEF_PRIORITY,
- .target_mem_cgroup = NULL,
- .nodemask = nodemask,
};
/*
@@ -2722,17 +2715,14 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
unsigned long *nr_scanned)
{
struct scan_control sc = {
- .nr_scanned = 0,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
+ .target_mem_cgroup = memcg,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = !noswap,
- .order = 0,
- .priority = 0,
- .swappiness = mem_cgroup_swappiness(memcg),
- .target_mem_cgroup = memcg,
};
struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ int swappiness = mem_cgroup_swappiness(memcg);
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2748,7 +2738,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
- shrink_lruvec(lruvec, &sc);
+ shrink_lruvec(lruvec, swappiness, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
@@ -2764,16 +2754,14 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_reclaimed;
int nid;
struct scan_control sc = {
- .may_writepage = !laptop_mode,
- .may_unmap = 1,
- .may_swap = !noswap,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
- .order = 0,
- .priority = DEF_PRIORITY,
- .target_mem_cgroup = memcg,
- .nodemask = NULL, /* we don't care the placement */
.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+ .target_mem_cgroup = memcg,
+ .priority = DEF_PRIORITY,
+ .may_writepage = !laptop_mode,
+ .may_unmap = 1,
+ .may_swap = !noswap,
};
/*
@@ -3031,12 +3019,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
unsigned long nr_soft_scanned;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
+ .order = order,
.priority = DEF_PRIORITY,
+ .may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = 1,
- .may_writepage = !laptop_mode,
- .order = order,
- .target_mem_cgroup = NULL,
};
count_vm_event(PAGEOUTRUN);
@@ -3417,14 +3404,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
{
struct reclaim_state reclaim_state;
struct scan_control sc = {
+ .nr_to_reclaim = nr_to_reclaim,
.gfp_mask = GFP_HIGHUSER_MOVABLE,
- .may_swap = 1,
- .may_unmap = 1,
+ .priority = DEF_PRIORITY,
.may_writepage = 1,
- .nr_to_reclaim = nr_to_reclaim,
+ .may_unmap = 1,
+ .may_swap = 1,
.hibernation_mode = 1,
- .order = 0,
- .priority = DEF_PRIORITY,
};
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
struct task_struct *p = current;
@@ -3604,13 +3590,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
struct task_struct *p = current;
struct reclaim_state reclaim_state;
struct scan_control sc = {
- .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
- .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
- .may_swap = 1,
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
.order = order,
.priority = ZONE_RECLAIM_PRIORITY,
+ .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
+ .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
+ .may_swap = 1,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b37bd49bfd55..c4d42e27ca9e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -7,6 +7,7 @@
* zoned VM statistics
* Copyright (C) 2006 Silicon Graphics, Inc.,
* Christoph Lameter <christoph@lameter.com>
+ * Copyright (C) 2008-2014 Christoph Lameter
*/
#include <linux/fs.h>
#include <linux/mm.h>
@@ -14,6 +15,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/cpumask.h>
#include <linux/vmstat.h>
#include <linux/sched.h>
#include <linux/math64.h>
@@ -200,7 +202,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
continue;
threshold = (*calculate_pressure)(zone);
- for_each_possible_cpu(cpu)
+ for_each_online_cpu(cpu)
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
= threshold;
}
@@ -419,13 +421,22 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
EXPORT_SYMBOL(dec_zone_page_state);
#endif
-static inline void fold_diff(int *diff)
+
+/*
+ * Fold a differential into the global counters.
+ * Returns the number of counters updated.
+ */
+static int fold_diff(int *diff)
{
int i;
+ int changes = 0;
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- if (diff[i])
+ if (diff[i]) {
atomic_long_add(diff[i], &vm_stat[i]);
+ changes++;
+ }
+ return changes;
}
/*
@@ -441,12 +452,15 @@ static inline void fold_diff(int *diff)
* statistics in the remote zone struct as well as the global cachelines
* with the global counters. These could cause remote node cache line
* bouncing and will have to be only done when necessary.
+ *
+ * The function returns the number of global counters updated.
*/
-static void refresh_cpu_vm_stats(void)
+static int refresh_cpu_vm_stats(void)
{
struct zone *zone;
int i;
int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+ int changes = 0;
for_each_populated_zone(zone) {
struct per_cpu_pageset __percpu *p = zone->pageset;
@@ -486,15 +500,17 @@ static void refresh_cpu_vm_stats(void)
continue;
}
-
if (__this_cpu_dec_return(p->expire))
continue;
- if (__this_cpu_read(p->pcp.count))
+ if (__this_cpu_read(p->pcp.count)) {
drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
+ changes++;
+ }
#endif
}
- fold_diff(global_diff);
+ changes += fold_diff(global_diff);
+ return changes;
}
/*
@@ -763,6 +779,7 @@ const char * const vmstat_text[] = {
"nr_shmem",
"nr_dirtied",
"nr_written",
+ "nr_pages_scanned",
#ifdef CONFIG_NUMA
"numa_hit",
@@ -1067,7 +1084,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
min_wmark_pages(zone),
low_wmark_pages(zone),
high_wmark_pages(zone),
- zone->pages_scanned,
+ zone_page_state(zone, NR_PAGES_SCANNED),
zone->spanned_pages,
zone->present_pages,
zone->managed_pages);
@@ -1077,10 +1094,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
zone_page_state(zone, i));
seq_printf(m,
- "\n protection: (%lu",
+ "\n protection: (%ld",
zone->lowmem_reserve[0]);
for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
- seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
+ seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
seq_printf(m,
")"
"\n pagesets");
@@ -1228,20 +1245,106 @@ static const struct file_operations proc_vmstat_file_operations = {
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;
+static cpumask_var_t cpu_stat_off;
static void vmstat_update(struct work_struct *w)
{
- refresh_cpu_vm_stats();
- schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+ if (refresh_cpu_vm_stats())
+ /*
+ * Counters were updated so we expect more updates
+ * to occur in the future. Keep on running the
+ * update worker thread.
+ */
+ schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+ round_jiffies_relative(sysctl_stat_interval));
+ else {
+ /*
+ * We did not update any counters so the app may be in
+ * a mode where it does not cause counter updates.
+ * We may be uselessly running vmstat_update.
+ * Defer the checking for differentials to the
+ * shepherd thread on a different processor.
+ */
+ int r;
+ /*
+ * Shepherd work thread does not race since it never
+ * changes the bit if its zero but the cpu
+ * online / off line code may race if
+ * worker threads are still allowed during
+ * shutdown / startup.
+ */
+ r = cpumask_test_and_set_cpu(smp_processor_id(),
+ cpu_stat_off);
+ VM_BUG_ON(r);
+ }
+}
+
+/*
+ * Check if the diffs for a certain cpu indicate that
+ * an update is needed.
+ */
+static bool need_update(int cpu)
+{
+ struct zone *zone;
+
+ for_each_populated_zone(zone) {
+ struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
+
+ BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
+ /*
+ * The fast way of checking if there are any vmstat diffs.
+ * This works because the diffs are byte sized items.
+ */
+ if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
+ return true;
+
+ }
+ return false;
+}
+
+
+/*
+ * Shepherd worker thread that checks the
+ * differentials of processors that have their worker
+ * threads for vm statistics updates disabled because of
+ * inactivity.
+ */
+static void vmstat_shepherd(struct work_struct *w);
+
+static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd);
+
+static void vmstat_shepherd(struct work_struct *w)
+{
+ int cpu;
+
+ /* Check processors whose vmstat worker threads have been disabled */
+ for_each_cpu(cpu, cpu_stat_off)
+ if (need_update(cpu) &&
+ cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
+
+ schedule_delayed_work_on(cpu, &per_cpu(vmstat_work, cpu),
+ __round_jiffies_relative(sysctl_stat_interval, cpu));
+
+
+ schedule_delayed_work(&shepherd,
round_jiffies_relative(sysctl_stat_interval));
+
}
-static void start_cpu_timer(int cpu)
+static void __init start_shepherd_timer(void)
{
- struct delayed_work *work = &per_cpu(vmstat_work, cpu);
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
+ vmstat_update);
+
+ if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
+ BUG();
+ cpumask_copy(cpu_stat_off, cpu_online_mask);
- INIT_DEFERRABLE_WORK(work, vmstat_update);
- schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
+ schedule_delayed_work(&shepherd,
+ round_jiffies_relative(sysctl_stat_interval));
}
static void vmstat_cpu_dead(int node)
@@ -1272,17 +1375,17 @@ static int vmstat_cpuup_callback(struct notifier_block *nfb,
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
refresh_zone_stat_thresholds();
- start_cpu_timer(cpu);
node_set_state(cpu_to_node(cpu), N_CPU);
+ cpumask_set_cpu(cpu, cpu_stat_off);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
- cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
- per_cpu(vmstat_work, cpu).work.func = NULL;
+ if (!cpumask_test_and_set_cpu(cpu, cpu_stat_off))
+ cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- start_cpu_timer(cpu);
+ cpumask_set_cpu(cpu, cpu_stat_off);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
@@ -1302,15 +1405,10 @@ static struct notifier_block vmstat_notifier =
static int __init setup_vmstat(void)
{
#ifdef CONFIG_SMP
- int cpu;
-
cpu_notifier_register_begin();
__register_cpu_notifier(&vmstat_notifier);
- for_each_online_cpu(cpu) {
- start_cpu_timer(cpu);
- node_set_state(cpu_to_node(cpu), N_CPU);
- }
+ start_shepherd_timer();
cpu_notifier_register_done();
#endif
#ifdef CONFIG_PROC_FS
diff --git a/mm/zbud.c b/mm/zbud.c
index 01df13a7e2e1..a05790b1915e 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -51,6 +51,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/zbud.h>
+#include <linux/zpool.h>
/*****************
* Structures
@@ -113,6 +114,90 @@ struct zbud_header {
};
/*****************
+ * zpool
+ ****************/
+
+#ifdef CONFIG_ZPOOL
+
+static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle)
+{
+ return zpool_evict(pool, handle);
+}
+
+static struct zbud_ops zbud_zpool_ops = {
+ .evict = zbud_zpool_evict
+};
+
+static void *zbud_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+ return zbud_create_pool(gfp, &zbud_zpool_ops);
+}
+
+static void zbud_zpool_destroy(void *pool)
+{
+ zbud_destroy_pool(pool);
+}
+
+static int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp,
+ unsigned long *handle)
+{
+ return zbud_alloc(pool, size, gfp, handle);
+}
+static void zbud_zpool_free(void *pool, unsigned long handle)
+{
+ zbud_free(pool, handle);
+}
+
+static int zbud_zpool_shrink(void *pool, unsigned int pages,
+ unsigned int *reclaimed)
+{
+ unsigned int total = 0;
+ int ret = -EINVAL;
+
+ while (total < pages) {
+ ret = zbud_reclaim_page(pool, 8);
+ if (ret < 0)
+ break;
+ total++;
+ }
+
+ if (reclaimed)
+ *reclaimed = total;
+
+ return ret;
+}
+
+static void *zbud_zpool_map(void *pool, unsigned long handle,
+ enum zpool_mapmode mm)
+{
+ return zbud_map(pool, handle);
+}
+static void zbud_zpool_unmap(void *pool, unsigned long handle)
+{
+ zbud_unmap(pool, handle);
+}
+
+static u64 zbud_zpool_total_size(void *pool)
+{
+ return zbud_get_pool_size(pool) * PAGE_SIZE;
+}
+
+static struct zpool_driver zbud_zpool_driver = {
+ .type = "zbud",
+ .owner = THIS_MODULE,
+ .create = zbud_zpool_create,
+ .destroy = zbud_zpool_destroy,
+ .malloc = zbud_zpool_malloc,
+ .free = zbud_zpool_free,
+ .shrink = zbud_zpool_shrink,
+ .map = zbud_zpool_map,
+ .unmap = zbud_zpool_unmap,
+ .total_size = zbud_zpool_total_size,
+};
+
+#endif /* CONFIG_ZPOOL */
+
+/*****************
* Helpers
*****************/
/* Just to make the code easier to read */
@@ -122,7 +207,7 @@ enum buddy {
};
/* Converts an allocation size in bytes to size in zbud chunks */
-static int size_to_chunks(int size)
+static int size_to_chunks(size_t size)
{
return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
}
@@ -247,7 +332,7 @@ void zbud_destroy_pool(struct zbud_pool *pool)
* gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
* a new page.
*/
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
unsigned long *handle)
{
int chunks, i, freechunks;
@@ -511,11 +596,20 @@ static int __init init_zbud(void)
/* Make sure the zbud header will fit in one chunk */
BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
pr_info("loaded\n");
+
+#ifdef CONFIG_ZPOOL
+ zpool_register_driver(&zbud_zpool_driver);
+#endif
+
return 0;
}
static void __exit exit_zbud(void)
{
+#ifdef CONFIG_ZPOOL
+ zpool_unregister_driver(&zbud_zpool_driver);
+#endif
+
pr_info("unloaded\n");
}
diff --git a/mm/zpool.c b/mm/zpool.c
new file mode 100644
index 000000000000..e40612a1df00
--- /dev/null
+++ b/mm/zpool.c
@@ -0,0 +1,364 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for memory storage pool implementations.
+ * Typically, this is used to store compressed memory.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/zpool.h>
+
+struct zpool {
+ char *type;
+
+ struct zpool_driver *driver;
+ void *pool;
+ struct zpool_ops *ops;
+
+ struct list_head list;
+};
+
+static LIST_HEAD(drivers_head);
+static DEFINE_SPINLOCK(drivers_lock);
+
+static LIST_HEAD(pools_head);
+static DEFINE_SPINLOCK(pools_lock);
+
+/**
+ * zpool_register_driver() - register a zpool implementation.
+ * @driver: driver to register
+ */
+void zpool_register_driver(struct zpool_driver *driver)
+{
+ spin_lock(&drivers_lock);
+ atomic_set(&driver->refcount, 0);
+ list_add(&driver->list, &drivers_head);
+ spin_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(zpool_register_driver);
+
+/**
+ * zpool_unregister_driver() - unregister a zpool implementation.
+ * @driver: driver to unregister.
+ *
+ * Module usage counting is used to prevent using a driver
+ * while/after unloading, so if this is called from module
+ * exit function, this should never fail; if called from
+ * other than the module exit function, and this returns
+ * failure, the driver is in use and must remain available.
+ */
+int zpool_unregister_driver(struct zpool_driver *driver)
+{
+ int ret = 0, refcount;
+
+ spin_lock(&drivers_lock);
+ refcount = atomic_read(&driver->refcount);
+ WARN_ON(refcount < 0);
+ if (refcount > 0)
+ ret = -EBUSY;
+ else
+ list_del(&driver->list);
+ spin_unlock(&drivers_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(zpool_unregister_driver);
+
+/**
+ * zpool_evict() - evict callback from a zpool implementation.
+ * @pool: pool to evict from.
+ * @handle: handle to evict.
+ *
+ * This can be used by zpool implementations to call the
+ * user's evict zpool_ops struct evict callback.
+ */
+int zpool_evict(void *pool, unsigned long handle)
+{
+ struct zpool *zpool;
+
+ spin_lock(&pools_lock);
+ list_for_each_entry(zpool, &pools_head, list) {
+ if (zpool->pool == pool) {
+ spin_unlock(&pools_lock);
+ if (!zpool->ops || !zpool->ops->evict)
+ return -EINVAL;
+ return zpool->ops->evict(zpool, handle);
+ }
+ }
+ spin_unlock(&pools_lock);
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL(zpool_evict);
+
+static struct zpool_driver *zpool_get_driver(char *type)
+{
+ struct zpool_driver *driver;
+
+ spin_lock(&drivers_lock);
+ list_for_each_entry(driver, &drivers_head, list) {
+ if (!strcmp(driver->type, type)) {
+ bool got = try_module_get(driver->owner);
+
+ if (got)
+ atomic_inc(&driver->refcount);
+ spin_unlock(&drivers_lock);
+ return got ? driver : NULL;
+ }
+ }
+
+ spin_unlock(&drivers_lock);
+ return NULL;
+}
+
+static void zpool_put_driver(struct zpool_driver *driver)
+{
+ atomic_dec(&driver->refcount);
+ module_put(driver->owner);
+}
+
+/**
+ * zpool_create_pool() - Create a new zpool
+ * @type The type of the zpool to create (e.g. zbud, zsmalloc)
+ * @gfp The GFP flags to use when allocating the pool.
+ * @ops The optional ops callback.
+ *
+ * This creates a new zpool of the specified type. The gfp flags will be
+ * used when allocating memory, if the implementation supports it. If the
+ * ops param is NULL, then the created zpool will not be shrinkable.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: New zpool on success, NULL on failure.
+ */
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops)
+{
+ struct zpool_driver *driver;
+ struct zpool *zpool;
+
+ pr_info("creating pool type %s\n", type);
+
+ driver = zpool_get_driver(type);
+
+ if (!driver) {
+ request_module(type);
+ driver = zpool_get_driver(type);
+ }
+
+ if (!driver) {
+ pr_err("no driver for type %s\n", type);
+ return NULL;
+ }
+
+ zpool = kmalloc(sizeof(*zpool), gfp);
+ if (!zpool) {
+ pr_err("couldn't create zpool - out of memory\n");
+ zpool_put_driver(driver);
+ return NULL;
+ }
+
+ zpool->type = driver->type;
+ zpool->driver = driver;
+ zpool->pool = driver->create(gfp, ops);
+ zpool->ops = ops;
+
+ if (!zpool->pool) {
+ pr_err("couldn't create %s pool\n", type);
+ zpool_put_driver(driver);
+ kfree(zpool);
+ return NULL;
+ }
+
+ pr_info("created %s pool\n", type);
+
+ spin_lock(&pools_lock);
+ list_add(&zpool->list, &pools_head);
+ spin_unlock(&pools_lock);
+
+ return zpool;
+}
+
+/**
+ * zpool_destroy_pool() - Destroy a zpool
+ * @pool The zpool to destroy.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when destroying different pools. The same
+ * pool should only be destroyed once, and should not be used
+ * after it is destroyed.
+ *
+ * This destroys an existing zpool. The zpool should not be in use.
+ */
+void zpool_destroy_pool(struct zpool *zpool)
+{
+ pr_info("destroying pool type %s\n", zpool->type);
+
+ spin_lock(&pools_lock);
+ list_del(&zpool->list);
+ spin_unlock(&pools_lock);
+ zpool->driver->destroy(zpool->pool);
+ zpool_put_driver(zpool->driver);
+ kfree(zpool);
+}
+
+/**
+ * zpool_get_type() - Get the type of the zpool
+ * @pool The zpool to check
+ *
+ * This returns the type of the pool.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: The type of zpool.
+ */
+char *zpool_get_type(struct zpool *zpool)
+{
+ return zpool->type;
+}
+
+/**
+ * zpool_malloc() - Allocate memory
+ * @pool The zpool to allocate from.
+ * @size The amount of memory to allocate.
+ * @gfp The GFP flags to use when allocating memory.
+ * @handle Pointer to the handle to set
+ *
+ * This allocates the requested amount of memory from the pool.
+ * The gfp flags will be used when allocating memory, if the
+ * implementation supports it. The provided @handle will be
+ * set to the allocated object handle.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error.
+ */
+int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp,
+ unsigned long *handle)
+{
+ return zpool->driver->malloc(zpool->pool, size, gfp, handle);
+}
+
+/**
+ * zpool_free() - Free previously allocated memory
+ * @pool The zpool that allocated the memory.
+ * @handle The handle to the memory to free.
+ *
+ * This frees previously allocated memory. This does not guarantee
+ * that the pool will actually free memory, only that the memory
+ * in the pool will become available for use by the pool.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when freeing different handles. The same
+ * handle should only be freed once, and should not be used
+ * after freeing.
+ */
+void zpool_free(struct zpool *zpool, unsigned long handle)
+{
+ zpool->driver->free(zpool->pool, handle);
+}
+
+/**
+ * zpool_shrink() - Shrink the pool size
+ * @pool The zpool to shrink.
+ * @pages The number of pages to shrink the pool.
+ * @reclaimed The number of pages successfully evicted.
+ *
+ * This attempts to shrink the actual memory size of the pool
+ * by evicting currently used handle(s). If the pool was
+ * created with no zpool_ops, or the evict call fails for any
+ * of the handles, this will fail. If non-NULL, the @reclaimed
+ * parameter will be set to the number of pages reclaimed,
+ * which may be more than the number of pages requested.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error/failure.
+ */
+int zpool_shrink(struct zpool *zpool, unsigned int pages,
+ unsigned int *reclaimed)
+{
+ return zpool->driver->shrink(zpool->pool, pages, reclaimed);
+}
+
+/**
+ * zpool_map_handle() - Map a previously allocated handle into memory
+ * @pool The zpool that the handle was allocated from
+ * @handle The handle to map
+ * @mm How the memory should be mapped
+ *
+ * This maps a previously allocated handle into memory. The @mm
+ * param indicates to the implementation how the memory will be
+ * used, i.e. read-only, write-only, read-write. If the
+ * implementation does not support it, the memory will be treated
+ * as read-write.
+ *
+ * This may hold locks, disable interrupts, and/or preemption,
+ * and the zpool_unmap_handle() must be called to undo those
+ * actions. The code that uses the mapped handle should complete
+ * its operatons on the mapped handle memory quickly and unmap
+ * as soon as possible. As the implementation may use per-cpu
+ * data, multiple handles should not be mapped concurrently on
+ * any cpu.
+ *
+ * Returns: A pointer to the handle's mapped memory area.
+ */
+void *zpool_map_handle(struct zpool *zpool, unsigned long handle,
+ enum zpool_mapmode mapmode)
+{
+ return zpool->driver->map(zpool->pool, handle, mapmode);
+}
+
+/**
+ * zpool_unmap_handle() - Unmap a previously mapped handle
+ * @pool The zpool that the handle was allocated from
+ * @handle The handle to unmap
+ *
+ * This unmaps a previously mapped handle. Any locks or other
+ * actions that the implementation took in zpool_map_handle()
+ * will be undone here. The memory area returned from
+ * zpool_map_handle() should no longer be used after this.
+ */
+void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
+{
+ zpool->driver->unmap(zpool->pool, handle);
+}
+
+/**
+ * zpool_get_total_size() - The total size of the pool
+ * @pool The zpool to check
+ *
+ * This returns the total size in bytes of the pool.
+ *
+ * Returns: Total size of the zpool in bytes.
+ */
+u64 zpool_get_total_size(struct zpool *zpool)
+{
+ return zpool->driver->total_size(zpool->pool);
+}
+
+static int __init init_zpool(void)
+{
+ pr_info("loaded\n");
+ return 0;
+}
+
+static void __exit exit_zpool(void)
+{
+ pr_info("unloaded\n");
+}
+
+module_init(init_zpool);
+module_exit(exit_zpool);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+MODULE_DESCRIPTION("Common API for compressed memory storage");
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index fe78189624cf..4e2fc83cb394 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -92,6 +92,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/zsmalloc.h>
+#include <linux/zpool.h>
/*
* This must be power of 2 and greater than of equal to sizeof(link_free).
@@ -240,6 +241,81 @@ struct mapping_area {
enum zs_mapmode vm_mm; /* mapping mode */
};
+/* zpool driver */
+
+#ifdef CONFIG_ZPOOL
+
+static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+ return zs_create_pool(gfp);
+}
+
+static void zs_zpool_destroy(void *pool)
+{
+ zs_destroy_pool(pool);
+}
+
+static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
+ unsigned long *handle)
+{
+ *handle = zs_malloc(pool, size);
+ return *handle ? 0 : -1;
+}
+static void zs_zpool_free(void *pool, unsigned long handle)
+{
+ zs_free(pool, handle);
+}
+
+static int zs_zpool_shrink(void *pool, unsigned int pages,
+ unsigned int *reclaimed)
+{
+ return -EINVAL;
+}
+
+static void *zs_zpool_map(void *pool, unsigned long handle,
+ enum zpool_mapmode mm)
+{
+ enum zs_mapmode zs_mm;
+
+ switch (mm) {
+ case ZPOOL_MM_RO:
+ zs_mm = ZS_MM_RO;
+ break;
+ case ZPOOL_MM_WO:
+ zs_mm = ZS_MM_WO;
+ break;
+ case ZPOOL_MM_RW: /* fallthru */
+ default:
+ zs_mm = ZS_MM_RW;
+ break;
+ }
+
+ return zs_map_object(pool, handle, zs_mm);
+}
+static void zs_zpool_unmap(void *pool, unsigned long handle)
+{
+ zs_unmap_object(pool, handle);
+}
+
+static u64 zs_zpool_total_size(void *pool)
+{
+ return zs_get_total_size_bytes(pool);
+}
+
+static struct zpool_driver zs_zpool_driver = {
+ .type = "zsmalloc",
+ .owner = THIS_MODULE,
+ .create = zs_zpool_create,
+ .destroy = zs_zpool_destroy,
+ .malloc = zs_zpool_malloc,
+ .free = zs_zpool_free,
+ .shrink = zs_zpool_shrink,
+ .map = zs_zpool_map,
+ .unmap = zs_zpool_unmap,
+ .total_size = zs_zpool_total_size,
+};
+
+#endif /* CONFIG_ZPOOL */
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
@@ -690,7 +766,7 @@ static inline void __zs_cpu_down(struct mapping_area *area)
static inline void *__zs_map_object(struct mapping_area *area,
struct page *pages[2], int off, int size)
{
- BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
+ BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
area->vm_addr = area->vm->addr;
return area->vm_addr + off;
}
@@ -814,6 +890,10 @@ static void zs_exit(void)
{
int cpu;
+#ifdef CONFIG_ZPOOL
+ zpool_unregister_driver(&zs_zpool_driver);
+#endif
+
cpu_notifier_register_begin();
for_each_online_cpu(cpu)
@@ -840,6 +920,10 @@ static int zs_init(void)
cpu_notifier_register_done();
+#ifdef CONFIG_ZPOOL
+ zpool_register_driver(&zs_zpool_driver);
+#endif
+
return 0;
fail:
zs_exit();
diff --git a/mm/zswap.c b/mm/zswap.c
index 008388fe7b0f..ea064c1a09ba 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -34,7 +34,7 @@
#include <linux/swap.h>
#include <linux/crypto.h>
#include <linux/mempool.h>
-#include <linux/zbud.h>
+#include <linux/zpool.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
@@ -45,8 +45,8 @@
/*********************************
* statistics
**********************************/
-/* Number of memory pages used by the compressed pool */
-static u64 zswap_pool_pages;
+/* Total bytes used by the compressed storage */
+static u64 zswap_pool_total_size;
/* The number of compressed pages currently stored in zswap */
static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
@@ -89,8 +89,13 @@ static unsigned int zswap_max_pool_percent = 20;
module_param_named(max_pool_percent,
zswap_max_pool_percent, uint, 0644);
-/* zbud_pool is shared by all of zswap backend */
-static struct zbud_pool *zswap_pool;
+/* Compressed storage to use */
+#define ZSWAP_ZPOOL_DEFAULT "zbud"
+static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+module_param_named(zpool, zswap_zpool_type, charp, 0444);
+
+/* zpool is shared by all of zswap backend */
+static struct zpool *zswap_pool;
/*********************************
* compression functions
@@ -168,7 +173,7 @@ static void zswap_comp_exit(void)
* be held while changing the refcount. Since the lock must
* be held, there is no reason to also make refcount atomic.
* offset - the swap offset for the entry. Index into the red-black tree.
- * handle - zbud allocation handle that stores the compressed page data
+ * handle - zpool allocation handle that stores the compressed page data
* length - the length in bytes of the compressed page data. Needed during
* decompression
*/
@@ -207,7 +212,7 @@ static int zswap_entry_cache_create(void)
return zswap_entry_cache == NULL;
}
-static void zswap_entry_cache_destory(void)
+static void __init zswap_entry_cache_destroy(void)
{
kmem_cache_destroy(zswap_entry_cache);
}
@@ -284,15 +289,15 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
}
/*
- * Carries out the common pattern of freeing and entry's zbud allocation,
+ * Carries out the common pattern of freeing and entry's zpool allocation,
* freeing the entry itself, and decrementing the number of stored pages.
*/
static void zswap_free_entry(struct zswap_entry *entry)
{
- zbud_free(zswap_pool, entry->handle);
+ zpool_free(zswap_pool, entry->handle);
zswap_entry_cache_free(entry);
atomic_dec(&zswap_stored_pages);
- zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+ zswap_pool_total_size = zpool_get_total_size(zswap_pool);
}
/* caller must hold the tree lock */
@@ -409,7 +414,7 @@ cleanup:
static bool zswap_is_full(void)
{
return totalram_pages * zswap_max_pool_percent / 100 <
- zswap_pool_pages;
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
}
/*********************************
@@ -502,7 +507,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
* clear SWAP_HAS_CACHE flag.
*/
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
} while (err != -ENOMEM);
if (new_page)
@@ -525,7 +530,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
* the swap cache, the compressed version stored by zswap can be
* freed.
*/
-static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
+static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
{
struct zswap_header *zhdr;
swp_entry_t swpentry;
@@ -541,9 +546,9 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
};
/* extract swpentry from data */
- zhdr = zbud_map(pool, handle);
+ zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
swpentry = zhdr->swpentry; /* here */
- zbud_unmap(pool, handle);
+ zpool_unmap_handle(pool, handle);
tree = zswap_trees[swp_type(swpentry)];
offset = swp_offset(swpentry);
@@ -573,13 +578,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
case ZSWAP_SWAPCACHE_NEW: /* page is locked */
/* decompress */
dlen = PAGE_SIZE;
- src = (u8 *)zbud_map(zswap_pool, entry->handle) +
- sizeof(struct zswap_header);
+ src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+ ZPOOL_MM_RO) + sizeof(struct zswap_header);
dst = kmap_atomic(page);
ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
entry->length, dst, &dlen);
kunmap_atomic(dst);
- zbud_unmap(zswap_pool, entry->handle);
+ zpool_unmap_handle(zswap_pool, entry->handle);
BUG_ON(ret);
BUG_ON(dlen != PAGE_SIZE);
@@ -652,7 +657,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
/* reclaim space if needed */
if (zswap_is_full()) {
zswap_pool_limit_hit++;
- if (zbud_reclaim_page(zswap_pool, 8)) {
+ if (zpool_shrink(zswap_pool, 1, NULL)) {
zswap_reject_reclaim_fail++;
ret = -ENOMEM;
goto reject;
@@ -679,7 +684,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
/* store */
len = dlen + sizeof(struct zswap_header);
- ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
+ ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
&handle);
if (ret == -ENOSPC) {
zswap_reject_compress_poor++;
@@ -689,11 +694,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
zswap_reject_alloc_fail++;
goto freepage;
}
- zhdr = zbud_map(zswap_pool, handle);
+ zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
zhdr->swpentry = swp_entry(type, offset);
buf = (u8 *)(zhdr + 1);
memcpy(buf, dst, dlen);
- zbud_unmap(zswap_pool, handle);
+ zpool_unmap_handle(zswap_pool, handle);
put_cpu_var(zswap_dstmem);
/* populate entry */
@@ -716,7 +721,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
/* update stats */
atomic_inc(&zswap_stored_pages);
- zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+ zswap_pool_total_size = zpool_get_total_size(zswap_pool);
return 0;
@@ -752,13 +757,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
/* decompress */
dlen = PAGE_SIZE;
- src = (u8 *)zbud_map(zswap_pool, entry->handle) +
- sizeof(struct zswap_header);
+ src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+ ZPOOL_MM_RO) + sizeof(struct zswap_header);
dst = kmap_atomic(page);
ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
dst, &dlen);
kunmap_atomic(dst);
- zbud_unmap(zswap_pool, entry->handle);
+ zpool_unmap_handle(zswap_pool, entry->handle);
BUG_ON(ret);
spin_lock(&tree->lock);
@@ -811,7 +816,7 @@ static void zswap_frontswap_invalidate_area(unsigned type)
zswap_trees[type] = NULL;
}
-static struct zbud_ops zswap_zbud_ops = {
+static struct zpool_ops zswap_zpool_ops = {
.evict = zswap_writeback_entry
};
@@ -869,8 +874,8 @@ static int __init zswap_debugfs_init(void)
zswap_debugfs_root, &zswap_written_back_pages);
debugfs_create_u64("duplicate_entry", S_IRUGO,
zswap_debugfs_root, &zswap_duplicate_entry);
- debugfs_create_u64("pool_pages", S_IRUGO,
- zswap_debugfs_root, &zswap_pool_pages);
+ debugfs_create_u64("pool_total_size", S_IRUGO,
+ zswap_debugfs_root, &zswap_pool_total_size);
debugfs_create_atomic_t("stored_pages", S_IRUGO,
zswap_debugfs_root, &zswap_stored_pages);
@@ -895,16 +900,26 @@ static void __exit zswap_debugfs_exit(void) { }
**********************************/
static int __init init_zswap(void)
{
+ gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+
if (!zswap_enabled)
return 0;
pr_info("loading zswap\n");
- zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
+ zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, &zswap_zpool_ops);
+ if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
+ pr_info("%s zpool not available\n", zswap_zpool_type);
+ zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+ zswap_pool = zpool_create_pool(zswap_zpool_type, gfp,
+ &zswap_zpool_ops);
+ }
if (!zswap_pool) {
- pr_err("zbud pool creation failed\n");
+ pr_err("%s zpool not available\n", zswap_zpool_type);
+ pr_err("zpool creation failed\n");
goto error;
}
+ pr_info("using %s pool\n", zswap_zpool_type);
if (zswap_entry_cache_create()) {
pr_err("entry cache creation failed\n");
@@ -926,9 +941,9 @@ static int __init init_zswap(void)
pcpufail:
zswap_comp_exit();
compfail:
- zswap_entry_cache_destory();
+ zswap_entry_cache_destroy();
cachefail:
- zbud_destroy_pool(zswap_pool);
+ zpool_destroy_pool(zswap_pool);
error:
return -ENOMEM;
}
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 022d18ab27a6..fc1835c6bb40 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -188,7 +188,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
/* Reached the end of the list, so insert after 'frag_entry_last'. */
if (likely(frag_entry_last)) {
- hlist_add_after(&frag_entry_last->list, &frag_entry_new->list);
+ hlist_add_behind(&frag_entry_new->list, &frag_entry_last->list);
chain->size += skb->len - hdr_size;
chain->timestamp = jiffies;
ret = true;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b4845f4b2bb4..7751c92c8c57 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br,
}
if (slot)
- hlist_add_after_rcu(slot, &port->rlist);
+ hlist_add_behind_rcu(&port->rlist, slot);
else
hlist_add_head_rcu(&port->rlist, &br->router_list);
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5afeb5aa4c7c..e9cb2588e416 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -940,7 +940,7 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
last = li;
}
if (last)
- hlist_add_after_rcu(&last->hlist, &new->hlist);
+ hlist_add_behind_rcu(&new->hlist, &last->hlist);
else
hlist_add_before_rcu(&new->hlist, &li->hlist);
}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 731e1e1722d9..fd0dc47f471d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -277,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
last = p;
}
if (last)
- hlist_add_after_rcu(&last->list, &newp->list);
+ hlist_add_behind_rcu(&newp->list, &last->list);
else
hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
out:
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0525d78ba328..beeed602aeb3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -389,7 +389,7 @@ redo:
if (h != h0)
continue;
hlist_del(&pol->bydst);
- hlist_add_after(entry0, &pol->bydst);
+ hlist_add_behind(&pol->bydst, entry0);
}
entry0 = &pol->bydst;
}
@@ -654,7 +654,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
break;
}
if (newpos)
- hlist_add_after(newpos, &policy->bydst);
+ hlist_add_behind(&policy->bydst, newpos);
else
hlist_add_head(&policy->bydst, chain);
xfrm_pol_hold(policy);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 182be0f12407..da74e65064d1 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -309,9 +309,12 @@ our $Operators = qr{
our $c90_Keywords = qr{do|for|while|if|else|return|goto|continue|switch|default|case|break}x;
our $NonptrType;
+our $NonptrTypeMisordered;
our $NonptrTypeWithAttr;
our $Type;
+our $TypeMisordered;
our $Declare;
+our $DeclareMisordered;
our $NON_ASCII_UTF8 = qr{
[\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
@@ -353,16 +356,36 @@ our $signature_tags = qr{(?xi:
Cc:
)};
+our @typeListMisordered = (
+ qr{char\s+(?:un)?signed},
+ qr{int\s+(?:(?:un)?signed\s+)?short\s},
+ qr{int\s+short(?:\s+(?:un)?signed)},
+ qr{short\s+int(?:\s+(?:un)?signed)},
+ qr{(?:un)?signed\s+int\s+short},
+ qr{short\s+(?:un)?signed},
+ qr{long\s+int\s+(?:un)?signed},
+ qr{int\s+long\s+(?:un)?signed},
+ qr{long\s+(?:un)?signed\s+int},
+ qr{int\s+(?:un)?signed\s+long},
+ qr{int\s+(?:un)?signed},
+ qr{int\s+long\s+long\s+(?:un)?signed},
+ qr{long\s+long\s+int\s+(?:un)?signed},
+ qr{long\s+long\s+(?:un)?signed\s+int},
+ qr{long\s+long\s+(?:un)?signed},
+ qr{long\s+(?:un)?signed},
+);
+
our @typeList = (
qr{void},
- qr{(?:unsigned\s+)?char},
- qr{(?:unsigned\s+)?short},
- qr{(?:unsigned\s+)?int},
- qr{(?:unsigned\s+)?long},
- qr{(?:unsigned\s+)?long\s+int},
- qr{(?:unsigned\s+)?long\s+long},
- qr{(?:unsigned\s+)?long\s+long\s+int},
- qr{unsigned},
+ qr{(?:(?:un)?signed\s+)?char},
+ qr{(?:(?:un)?signed\s+)?short\s+int},
+ qr{(?:(?:un)?signed\s+)?short},
+ qr{(?:(?:un)?signed\s+)?int},
+ qr{(?:(?:un)?signed\s+)?long\s+int},
+ qr{(?:(?:un)?signed\s+)?long\s+long\s+int},
+ qr{(?:(?:un)?signed\s+)?long\s+long},
+ qr{(?:(?:un)?signed\s+)?long},
+ qr{(?:un)?signed},
qr{float},
qr{double},
qr{bool},
@@ -372,6 +395,7 @@ our @typeList = (
qr{${Ident}_t},
qr{${Ident}_handler},
qr{${Ident}_handler_fn},
+ @typeListMisordered,
);
our @typeListWithAttr = (
@typeList,
@@ -413,6 +437,7 @@ our $allowed_asm_includes = qr{(?x:
sub build_types {
my $mods = "(?x: \n" . join("|\n ", @modifierList) . "\n)";
my $all = "(?x: \n" . join("|\n ", @typeList) . "\n)";
+ my $Misordered = "(?x: \n" . join("|\n ", @typeListMisordered) . "\n)";
my $allWithAttr = "(?x: \n" . join("|\n ", @typeListWithAttr) . "\n)";
$Modifier = qr{(?:$Attribute|$Sparse|$mods)};
$NonptrType = qr{
@@ -424,6 +449,13 @@ sub build_types {
)
(?:\s+$Modifier|\s+const)*
}x;
+ $NonptrTypeMisordered = qr{
+ (?:$Modifier\s+|const\s+)*
+ (?:
+ (?:${Misordered}\b)
+ )
+ (?:\s+$Modifier|\s+const)*
+ }x;
$NonptrTypeWithAttr = qr{
(?:$Modifier\s+|const\s+)*
(?:
@@ -435,10 +467,16 @@ sub build_types {
}x;
$Type = qr{
$NonptrType
- (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)?
+ (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)?
+ (?:\s+$Inline|\s+$Modifier)*
+ }x;
+ $TypeMisordered = qr{
+ $NonptrTypeMisordered
+ (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)?
(?:\s+$Inline|\s+$Modifier)*
}x;
$Declare = qr{(?:$Storage\s+(?:$Inline\s+)?)?$Type};
+ $DeclareMisordered = qr{(?:$Storage\s+(?:$Inline\s+)?)?$TypeMisordered};
}
build_types();
@@ -550,11 +588,43 @@ sub seed_camelcase_includes {
}
}
+sub git_commit_info {
+ my ($commit, $id, $desc) = @_;
+
+ return ($id, $desc) if ((which("git") eq "") || !(-e ".git"));
+
+ my $output = `git log --no-color --format='%H %s' -1 $commit 2>&1`;
+ $output =~ s/^\s*//gm;
+ my @lines = split("\n", $output);
+
+ if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous\./) {
+# Maybe one day convert this block of bash into something that returns
+# all matching commit ids, but it's very slow...
+#
+# echo "checking commits $1..."
+# git rev-list --remotes | grep -i "^$1" |
+# while read line ; do
+# git log --format='%H %s' -1 $line |
+# echo "commit $(cut -c 1-12,41-)"
+# done
+ } elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./) {
+ } else {
+ $id = substr($lines[0], 0, 12);
+ $desc = substr($lines[0], 41);
+ }
+
+ return ($id, $desc);
+}
+
$chk_signoff = 0 if ($file);
my @rawlines = ();
my @lines = ();
my @fixed = ();
+my @fixed_inserted = ();
+my @fixed_deleted = ();
+my $fixlinenr = -1;
+
my $vname;
for my $filename (@ARGV) {
my $FILE;
@@ -583,6 +653,9 @@ for my $filename (@ARGV) {
@rawlines = ();
@lines = ();
@fixed = ();
+ @fixed_inserted = ();
+ @fixed_deleted = ();
+ $fixlinenr = -1;
}
exit($exit);
@@ -674,6 +747,18 @@ sub format_email {
return $formatted_email;
}
+sub which {
+ my ($bin) = @_;
+
+ foreach my $path (split(/:/, $ENV{PATH})) {
+ if (-e "$path/$bin") {
+ return "$path/$bin";
+ }
+ }
+
+ return "";
+}
+
sub which_conf {
my ($conf) = @_;
@@ -1483,6 +1568,90 @@ sub report_dump {
our @report;
}
+sub fixup_current_range {
+ my ($lineRef, $offset, $length) = @_;
+
+ if ($$lineRef =~ /^\@\@ -\d+,\d+ \+(\d+),(\d+) \@\@/) {
+ my $o = $1;
+ my $l = $2;
+ my $no = $o + $offset;
+ my $nl = $l + $length;
+ $$lineRef =~ s/\+$o,$l \@\@/\+$no,$nl \@\@/;
+ }
+}
+
+sub fix_inserted_deleted_lines {
+ my ($linesRef, $insertedRef, $deletedRef) = @_;
+
+ my $range_last_linenr = 0;
+ my $delta_offset = 0;
+
+ my $old_linenr = 0;
+ my $new_linenr = 0;
+
+ my $next_insert = 0;
+ my $next_delete = 0;
+
+ my @lines = ();
+
+ my $inserted = @{$insertedRef}[$next_insert++];
+ my $deleted = @{$deletedRef}[$next_delete++];
+
+ foreach my $old_line (@{$linesRef}) {
+ my $save_line = 1;
+ my $line = $old_line; #don't modify the array
+ if ($line =~ /^(?:\+\+\+\|\-\-\-)\s+\S+/) { #new filename
+ $delta_offset = 0;
+ } elsif ($line =~ /^\@\@ -\d+,\d+ \+\d+,\d+ \@\@/) { #new hunk
+ $range_last_linenr = $new_linenr;
+ fixup_current_range(\$line, $delta_offset, 0);
+ }
+
+ while (defined($deleted) && ${$deleted}{'LINENR'} == $old_linenr) {
+ $deleted = @{$deletedRef}[$next_delete++];
+ $save_line = 0;
+ fixup_current_range(\$lines[$range_last_linenr], $delta_offset--, -1);
+ }
+
+ while (defined($inserted) && ${$inserted}{'LINENR'} == $old_linenr) {
+ push(@lines, ${$inserted}{'LINE'});
+ $inserted = @{$insertedRef}[$next_insert++];
+ $new_linenr++;
+ fixup_current_range(\$lines[$range_last_linenr], $delta_offset++, 1);
+ }
+
+ if ($save_line) {
+ push(@lines, $line);
+ $new_linenr++;
+ }
+
+ $old_linenr++;
+ }
+
+ return @lines;
+}
+
+sub fix_insert_line {
+ my ($linenr, $line) = @_;
+
+ my $inserted = {
+ LINENR => $linenr,
+ LINE => $line,
+ };
+ push(@fixed_inserted, $inserted);
+}
+
+sub fix_delete_line {
+ my ($linenr, $line) = @_;
+
+ my $deleted = {
+ LINENR => $linenr,
+ LINE => $line,
+ };
+
+ push(@fixed_deleted, $deleted);
+}
+
sub ERROR {
my ($type, $msg) = @_;
@@ -1637,11 +1806,13 @@ sub process {
my $signoff = 0;
my $is_patch = 0;
- my $in_header_lines = 1;
+ my $in_header_lines = $file ? 0 : 1;
my $in_commit_log = 0; #Scanning lines before patch
-
+ my $reported_maintainer_file = 0;
my $non_utf8_charset = 0;
+ my $last_blank_line = 0;
+
our @report = ();
our $cnt_lines = 0;
our $cnt_error = 0;
@@ -1759,8 +1930,10 @@ sub process {
$realcnt = 0;
$linenr = 0;
+ $fixlinenr = -1;
foreach my $line (@lines) {
$linenr++;
+ $fixlinenr++;
my $sline = $line; #copy of $line
$sline =~ s/$;/ /g; #with comments as spaces
@@ -1891,7 +2064,7 @@ sub process {
if (WARN("BAD_SIGN_OFF",
"Do not use whitespace before $ucfirst_sign_off\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =
+ $fixed[$fixlinenr] =
"$ucfirst_sign_off $email";
}
}
@@ -1899,7 +2072,7 @@ sub process {
if (WARN("BAD_SIGN_OFF",
"'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =
+ $fixed[$fixlinenr] =
"$ucfirst_sign_off $email";
}
@@ -1908,7 +2081,7 @@ sub process {
if (WARN("BAD_SIGN_OFF",
"Use a single space after $ucfirst_sign_off\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =
+ $fixed[$fixlinenr] =
"$ucfirst_sign_off $email";
}
}
@@ -1956,6 +2129,31 @@ sub process {
"Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr);
}
+# Check for improperly formed commit descriptions
+ if ($in_commit_log &&
+ $line =~ /\bcommit\s+[0-9a-f]{5,}/i &&
+ $line !~ /\b[Cc]ommit [0-9a-f]{12,16} \("/) {
+ $line =~ /\b(c)ommit\s+([0-9a-f]{5,})/i;
+ my $init_char = $1;
+ my $orig_commit = lc($2);
+ my $id = '01234567890ab';
+ my $desc = 'commit description';
+ ($id, $desc) = git_commit_info($orig_commit, $id, $desc);
+ ERROR("GIT_COMMIT_ID",
+ "Please use 12 to 16 chars for the git commit ID like: '${init_char}ommit $id (\"$desc\")'\n" . $herecurr);
+ }
+
+# Check for added, moved or deleted files
+ if (!$reported_maintainer_file && !$in_commit_log &&
+ ($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ ||
+ $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ ||
+ ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ &&
+ (defined($1) || defined($2))))) {
+ $reported_maintainer_file = 1;
+ WARN("FILE_PATH_CHANGES",
+ "added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr);
+ }
+
# Check for wrappage within a valid hunk of the file
if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) {
ERROR("CORRUPTED_PATCH",
@@ -1993,7 +2191,8 @@ sub process {
# Check if it's the start of a commit log
# (not a header line and we haven't seen the patch filename)
if ($in_header_lines && $realfile =~ /^$/ &&
- $rawline !~ /^(commit\b|from\b|[\w-]+:).+$/i) {
+ !($rawline =~ /^\s+\S/ ||
+ $rawline =~ /^(commit\b|from\b|[\w-]+:).*$/i)) {
$in_header_lines = 0;
$in_commit_log = 1;
}
@@ -2021,14 +2220,14 @@ sub process {
if (ERROR("DOS_LINE_ENDINGS",
"DOS line endings\n" . $herevet) &&
$fix) {
- $fixed[$linenr - 1] =~ s/[\s\015]+$//;
+ $fixed[$fixlinenr] =~ s/[\s\015]+$//;
}
} elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) {
my $herevet = "$here\n" . cat_vet($rawline) . "\n";
if (ERROR("TRAILING_WHITESPACE",
"trailing whitespace\n" . $herevet) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\s+$//;
+ $fixed[$fixlinenr] =~ s/\s+$//;
}
$rpt_cleaners = 1;
@@ -2049,7 +2248,7 @@ sub process {
# Only applies when adding the entry originally, after that we do not have
# sufficient context to determine whether it is indeed long enough.
if ($realfile =~ /Kconfig/ &&
- $line =~ /.\s*config\s+/) {
+ $line =~ /^\+\s*config\s+/) {
my $length = 0;
my $cnt = $realcnt;
my $ln = $linenr + 1;
@@ -2062,10 +2261,11 @@ sub process {
$is_end = $lines[$ln - 1] =~ /^\+/;
next if ($f =~ /^-/);
+ last if (!$file && $f =~ /^\@\@/);
- if ($lines[$ln - 1] =~ /.\s*(?:bool|tristate)\s*\"/) {
+ if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate)\s*\"/) {
$is_start = 1;
- } elsif ($lines[$ln - 1] =~ /.\s*(?:---)?help(?:---)?$/) {
+ } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) {
$length = -1;
}
@@ -2161,12 +2361,18 @@ sub process {
"quoted string split across lines\n" . $hereprev);
}
+# check for missing a space in a string concatination
+ if ($prevrawline =~ /[^\\]\w"$/ && $rawline =~ /^\+[\t ]+"\w/) {
+ WARN('MISSING_SPACE',
+ "break quoted strings at a space character\n" . $hereprev);
+ }
+
# check for spaces before a quoted newline
if ($rawline =~ /^.*\".*\s\\n/) {
if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
"unnecessary whitespace before a quoted newline\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
+ $fixed[$fixlinenr] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
}
}
@@ -2203,7 +2409,7 @@ sub process {
if (ERROR("CODE_INDENT",
"code indent should use tabs where possible\n" . $herevet) &&
$fix) {
- $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
+ $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
}
}
@@ -2213,9 +2419,9 @@ sub process {
if (WARN("SPACE_BEFORE_TAB",
"please, no space before tabs\n" . $herevet) &&
$fix) {
- while ($fixed[$linenr - 1] =~
+ while ($fixed[$fixlinenr] =~
s/(^\+.*) {8,8}+\t/$1\t\t/) {}
- while ($fixed[$linenr - 1] =~
+ while ($fixed[$fixlinenr] =~
s/(^\+.*) +\t/$1\t/) {}
}
}
@@ -2249,19 +2455,19 @@ sub process {
if (CHK("PARENTHESIS_ALIGNMENT",
"Alignment should match open parenthesis\n" . $hereprev) &&
$fix && $line =~ /^\+/) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^\+[ \t]*/\+$goodtabindent/;
}
}
}
}
- if ($line =~ /^\+.*\*[ \t]*\)[ \t]+(?!$Assignment|$Arithmetic)/) {
+ if ($line =~ /^\+.*\(\s*$Type\s*\)[ \t]+(?!$Assignment|$Arithmetic|{)/) {
if (CHK("SPACING",
- "No space is necessary after a cast\n" . $hereprev) &&
+ "No space is necessary after a cast\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
- s/^(\+.*\*[ \t]*\))[ \t]+/$1/;
+ $fixed[$fixlinenr] =~
+ s/(\(\s*$Type\s*\))[ \t]+/$1/;
}
}
@@ -2291,10 +2497,44 @@ sub process {
"networking block comments put the trailing */ on a separate line\n" . $herecurr);
}
+# check for missing blank lines after struct/union declarations
+# with exceptions for various attributes and macros
+ if ($prevline =~ /^[\+ ]};?\s*$/ &&
+ $line =~ /^\+/ &&
+ !($line =~ /^\+\s*$/ ||
+ $line =~ /^\+\s*EXPORT_SYMBOL/ ||
+ $line =~ /^\+\s*MODULE_/i ||
+ $line =~ /^\+\s*\#\s*(?:end|elif|else)/ ||
+ $line =~ /^\+[a-z_]*init/ ||
+ $line =~ /^\+\s*(?:static\s+)?[A-Z_]*ATTR/ ||
+ $line =~ /^\+\s*DECLARE/ ||
+ $line =~ /^\+\s*__setup/)) {
+ if (CHK("LINE_SPACING",
+ "Please use a blank line after function/struct/union/enum declarations\n" . $hereprev) &&
+ $fix) {
+ fix_insert_line($fixlinenr, "\+");
+ }
+ }
+
+# check for multiple consecutive blank lines
+ if ($prevline =~ /^[\+ ]\s*$/ &&
+ $line =~ /^\+\s*$/ &&
+ $last_blank_line != ($linenr - 1)) {
+ if (CHK("LINE_SPACING",
+ "Please don't use multiple blank lines\n" . $hereprev) &&
+ $fix) {
+ fix_delete_line($fixlinenr, $rawline);
+ }
+
+ $last_blank_line = $linenr;
+ }
+
# check for missing blank lines after declarations
if ($sline =~ /^\+\s+\S/ && #Not at char 1
# actual declarations
($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # function pointer declarations
+ $prevline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
# foo bar; where foo is some local typedef or #define
$prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
# known declaration macros
@@ -2307,6 +2547,8 @@ sub process {
$prevline =~ /(?:\{\s*|\\)$/) &&
# looks like a declaration
!($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # function pointer declarations
+ $sline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
# foo bar; where foo is some local typedef or #define
$sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
# known declaration macros
@@ -2321,8 +2563,11 @@ sub process {
$sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
# indentation of previous and current line are the same
(($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
- WARN("SPACING",
- "Missing a blank line after declarations\n" . $hereprev);
+ if (WARN("LINE_SPACING",
+ "Missing a blank line after declarations\n" . $hereprev) &&
+ $fix) {
+ fix_insert_line($fixlinenr, "\+");
+ }
}
# check for spaces at the beginning of a line.
@@ -2335,13 +2580,33 @@ sub process {
if (WARN("LEADING_SPACE",
"please, no spaces at the start of a line\n" . $herevet) &&
$fix) {
- $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
+ $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
}
}
# check we are in a valid C source file if not then ignore this hunk
next if ($realfile !~ /\.(h|c)$/);
+# check indentation of any line with a bare else
+# if the previous line is a break or return and is indented 1 tab more...
+ if ($sline =~ /^\+([\t]+)(?:}[ \t]*)?else(?:[ \t]*{)?\s*$/) {
+ my $tabs = length($1) + 1;
+ if ($prevline =~ /^\+\t{$tabs,$tabs}(?:break|return)\b/) {
+ WARN("UNNECESSARY_ELSE",
+ "else is not generally useful after a break or return\n" . $hereprev);
+ }
+ }
+
+# check indentation of a line with a break;
+# if the previous line is a goto or return and is indented the same # of tabs
+ if ($sline =~ /^\+([\t]+)break\s*;\s*$/) {
+ my $tabs = $1;
+ if ($prevline =~ /^\+$tabs(?:goto|return)\b/) {
+ WARN("UNNECESSARY_BREAK",
+ "break is not useful after a goto or return\n" . $hereprev);
+ }
+ }
+
# discourage the addition of CONFIG_EXPERIMENTAL in #if(def).
if ($line =~ /^\+\s*\#\s*if.*\bCONFIG_EXPERIMENTAL\b/) {
WARN("CONFIG_EXPERIMENTAL",
@@ -2477,7 +2742,7 @@ sub process {
# if/while/etc brace do not go on next line, unless defining a do while loop,
# or if that brace on the next line is for something else
- if ($line =~ /(.*)\b((?:if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
+ if ($line =~ /(.*)\b((?:if|while|for|switch|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
my $pre_ctx = "$1$2";
my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0);
@@ -2504,7 +2769,7 @@ sub process {
#print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
#print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
- if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
+ if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
ERROR("OPEN_BRACE",
"that open brace { should be on the previous line\n" .
"$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
@@ -2523,7 +2788,7 @@ sub process {
}
# Check relative indent for conditionals and blocks.
- if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
+ if ($line =~ /\b(?:(?:if|while|for|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
($stat, $cond, $line_nr_next, $remain_next, $off_next) =
ctx_statement_block($linenr, $realcnt, 0)
if (!defined $stat);
@@ -2654,8 +2919,18 @@ sub process {
# check for initialisation to aggregates open brace on the next line
if ($line =~ /^.\s*{/ &&
$prevline =~ /(?:^|[^=])=\s*$/) {
- ERROR("OPEN_BRACE",
- "that open brace { should be on the previous line\n" . $hereprev);
+ if (ERROR("OPEN_BRACE",
+ "that open brace { should be on the previous line\n" . $hereprev) &&
+ $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+ fix_delete_line($fixlinenr - 1, $prevrawline);
+ fix_delete_line($fixlinenr, $rawline);
+ my $fixedline = $prevrawline;
+ $fixedline =~ s/\s*=\s*$/ = {/;
+ fix_insert_line($fixlinenr, $fixedline);
+ $fixedline = $line;
+ $fixedline =~ s/^(.\s*){\s*/$1/;
+ fix_insert_line($fixlinenr, $fixedline);
+ }
}
#
@@ -2680,10 +2955,10 @@ sub process {
if (ERROR("C99_COMMENTS",
"do not use C99 // comments\n" . $herecurr) &&
$fix) {
- my $line = $fixed[$linenr - 1];
+ my $line = $fixed[$fixlinenr];
if ($line =~ /\/\/(.*)$/) {
my $comment = trim($1);
- $fixed[$linenr - 1] =~ s@\/\/(.*)$@/\* $comment \*/@;
+ $fixed[$fixlinenr] =~ s@\/\/(.*)$@/\* $comment \*/@;
}
}
}
@@ -2742,7 +3017,7 @@ sub process {
"do not initialise globals to 0 or NULL\n" .
$herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
+ $fixed[$fixlinenr] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
}
}
# check for static initialisers.
@@ -2751,10 +3026,17 @@ sub process {
"do not initialise statics to 0 or NULL\n" .
$herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
+ $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
}
}
+# check for misordered declarations of char/short/int/long with signed/unsigned
+ while ($sline =~ m{(\b$TypeMisordered\b)}g) {
+ my $tmp = trim($1);
+ WARN("MISORDERED_TYPE",
+ "type '$tmp' should be specified in [[un]signed] [short|int|long|long long] order\n" . $herecurr);
+ }
+
# check for static const char * arrays.
if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) {
WARN("STATIC_CONST_CHAR_ARRAY",
@@ -2781,7 +3063,7 @@ sub process {
if (ERROR("FUNCTION_WITHOUT_ARGS",
"Bad function definition - $1() should probably be $1(void)\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
+ $fixed[$fixlinenr] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
}
}
@@ -2790,7 +3072,7 @@ sub process {
if (WARN("DEFINE_PCI_DEVICE_TABLE",
"Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
+ $fixed[$fixlinenr] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
}
}
@@ -2827,7 +3109,7 @@ sub process {
my $sub_from = $ident;
my $sub_to = $ident;
$sub_to =~ s/\Q$from\E/$to/;
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s@\Q$sub_from\E@$sub_to@;
}
}
@@ -2855,7 +3137,7 @@ sub process {
my $sub_from = $match;
my $sub_to = $match;
$sub_to =~ s/\Q$from\E/$to/;
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s@\Q$sub_from\E@$sub_to@;
}
}
@@ -2917,7 +3199,7 @@ sub process {
if (WARN("PREFER_PR_LEVEL",
"Prefer pr_warn(... to pr_warning(...\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\bpr_warning\b/pr_warn/;
}
}
@@ -2933,17 +3215,40 @@ sub process {
# function brace can't be on same line, except for #defines of do while,
# or if closed on same line
- if (($line=~/$Type\s*$Ident\(.*\).*\s{/) and
+ if (($line=~/$Type\s*$Ident\(.*\).*\s*{/) and
!($line=~/\#\s*define.*do\s{/) and !($line=~/}/)) {
- ERROR("OPEN_BRACE",
- "open brace '{' following function declarations go on the next line\n" . $herecurr);
+ if (ERROR("OPEN_BRACE",
+ "open brace '{' following function declarations go on the next line\n" . $herecurr) &&
+ $fix) {
+ fix_delete_line($fixlinenr, $rawline);
+ my $fixed_line = $rawline;
+ $fixed_line =~ /(^..*$Type\s*$Ident\(.*\)\s*){(.*)$/;
+ my $line1 = $1;
+ my $line2 = $2;
+ fix_insert_line($fixlinenr, ltrim($line1));
+ fix_insert_line($fixlinenr, "\+{");
+ if ($line2 !~ /^\s*$/) {
+ fix_insert_line($fixlinenr, "\+\t" . trim($line2));
+ }
+ }
}
# open braces for enum, union and struct go on the same line.
if ($line =~ /^.\s*{/ &&
$prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) {
- ERROR("OPEN_BRACE",
- "open brace '{' following $1 go on the same line\n" . $hereprev);
+ if (ERROR("OPEN_BRACE",
+ "open brace '{' following $1 go on the same line\n" . $hereprev) &&
+ $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+ fix_delete_line($fixlinenr - 1, $prevrawline);
+ fix_delete_line($fixlinenr, $rawline);
+ my $fixedline = rtrim($prevrawline) . " {";
+ fix_insert_line($fixlinenr, $fixedline);
+ $fixedline = $rawline;
+ $fixedline =~ s/^(.\s*){\s*/$1\t/;
+ if ($fixedline !~ /^\+\s*$/) {
+ fix_insert_line($fixlinenr, $fixedline);
+ }
+ }
}
# missing space after union, struct or enum definition
@@ -2951,7 +3256,7 @@ sub process {
if (WARN("SPACING",
"missing space after $1 definition\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/;
}
}
@@ -3021,7 +3326,7 @@ sub process {
}
if (show_type("SPACING") && $fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^(.\s*)$Declare\s*\(\s*\*\s*$Ident\s*\)\s*\(/$1 . $declare . $post_declare_space . '(*' . $funcname . ')('/ex;
}
}
@@ -3038,7 +3343,7 @@ sub process {
if (ERROR("BRACKET_SPACE",
"space prohibited before open square bracket '['\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^(\+.*?)\s+\[/$1\[/;
}
}
@@ -3073,7 +3378,7 @@ sub process {
if (WARN("SPACING",
"space prohibited between function name and open parenthesis '('\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\b$name\s+\(/$name\(/;
}
}
@@ -3341,8 +3646,8 @@ sub process {
$fixed_line = $fixed_line . $fix_elements[$#elements];
}
- if ($fix && $line_fixed && $fixed_line ne $fixed[$linenr - 1]) {
- $fixed[$linenr - 1] = $fixed_line;
+ if ($fix && $line_fixed && $fixed_line ne $fixed[$fixlinenr]) {
+ $fixed[$fixlinenr] = $fixed_line;
}
@@ -3353,7 +3658,7 @@ sub process {
if (WARN("SPACING",
"space prohibited before semicolon\n" . $herecurr) &&
$fix) {
- 1 while $fixed[$linenr - 1] =~
+ 1 while $fixed[$fixlinenr] =~
s/^(\+.*\S)\s+;/$1;/;
}
}
@@ -3386,7 +3691,7 @@ sub process {
if (ERROR("SPACING",
"space required before the open brace '{'\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/^(\+.*(?:do|\))){/$1 {/;
+ $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/;
}
}
@@ -3404,7 +3709,7 @@ sub process {
if (ERROR("SPACING",
"space required after that close brace '}'\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/}((?!(?:,|;|\)))\S)/} $1/;
}
}
@@ -3414,7 +3719,7 @@ sub process {
if (ERROR("SPACING",
"space prohibited after that open square bracket '['\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\[\s+/\[/;
}
}
@@ -3422,7 +3727,7 @@ sub process {
if (ERROR("SPACING",
"space prohibited before that close square bracket ']'\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\s+\]/\]/;
}
}
@@ -3433,7 +3738,7 @@ sub process {
if (ERROR("SPACING",
"space prohibited after that open parenthesis '('\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\(\s+/\(/;
}
}
@@ -3443,18 +3748,27 @@ sub process {
if (ERROR("SPACING",
"space prohibited before that close parenthesis ')'\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ print("fixlinenr: <$fixlinenr> fixed[fixlinenr]: <$fixed[$fixlinenr]>\n");
+ $fixed[$fixlinenr] =~
s/\s+\)/\)/;
}
}
+# check unnecessary parentheses around addressof/dereference single $Lvals
+# ie: &(foo->bar) should be &foo->bar and *(foo->bar) should be *foo->bar
+
+ while ($line =~ /(?:[^&]&\s*|\*)\(\s*($Ident\s*(?:$Member\s*)+)\s*\)/g) {
+ CHK("UNNECESSARY_PARENTHESES",
+ "Unnecessary parentheses around $1\n" . $herecurr);
+ }
+
#goto labels aren't indented, allow a single space however
if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
!($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
if (WARN("INDENTED_LABEL",
"labels should not be indented\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^(.)\s+/$1/;
}
}
@@ -3516,7 +3830,7 @@ sub process {
if (ERROR("SPACING",
"space required before the open parenthesis '('\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\b(if|while|for|switch)\(/$1 \(/;
}
}
@@ -3606,7 +3920,7 @@ sub process {
# if should not continue a brace
if ($line =~ /}\s*if\b/) {
ERROR("TRAILING_STATEMENTS",
- "trailing statements should be on next line\n" .
+ "trailing statements should be on next line (or did you mean 'else if'?)\n" .
$herecurr);
}
# case and default should not have general statements after them
@@ -3622,14 +3936,26 @@ sub process {
# Check for }<nl>else {, these must be at the same
# indent level to be relevant to each other.
- if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and
- $previndent == $indent) {
- ERROR("ELSE_AFTER_BRACE",
- "else should follow close brace '}'\n" . $hereprev);
+ if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ &&
+ $previndent == $indent) {
+ if (ERROR("ELSE_AFTER_BRACE",
+ "else should follow close brace '}'\n" . $hereprev) &&
+ $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+ fix_delete_line($fixlinenr - 1, $prevrawline);
+ fix_delete_line($fixlinenr, $rawline);
+ my $fixedline = $prevrawline;
+ $fixedline =~ s/}\s*$//;
+ if ($fixedline !~ /^\+\s*$/) {
+ fix_insert_line($fixlinenr, $fixedline);
+ }
+ $fixedline = $rawline;
+ $fixedline =~ s/^(.\s*)else/$1} else/;
+ fix_insert_line($fixlinenr, $fixedline);
+ }
}
- if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and
- $previndent == $indent) {
+ if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ &&
+ $previndent == $indent) {
my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0);
# Find out what is on the end of the line after the
@@ -3638,8 +3964,18 @@ sub process {
$s =~ s/\n.*//g;
if ($s =~ /^\s*;/) {
- ERROR("WHILE_AFTER_BRACE",
- "while should follow close brace '}'\n" . $hereprev);
+ if (ERROR("WHILE_AFTER_BRACE",
+ "while should follow close brace '}'\n" . $hereprev) &&
+ $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+ fix_delete_line($fixlinenr - 1, $prevrawline);
+ fix_delete_line($fixlinenr, $rawline);
+ my $fixedline = $prevrawline;
+ my $trailing = $rawline;
+ $trailing =~ s/^\+//;
+ $trailing = trim($trailing);
+ $fixedline =~ s/}\s*$/} $trailing/;
+ fix_insert_line($fixlinenr, $fixedline);
+ }
}
}
@@ -3653,7 +3989,7 @@ sub process {
"Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) &&
$fix) {
my $hexval = sprintf("0x%x", oct($var));
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\b$var\b/$hexval/;
}
}
@@ -3689,7 +4025,7 @@ sub process {
if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION",
"Whitespace after \\ makes next lines useless\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\s+$//;
+ $fixed[$fixlinenr] =~ s/\s+$//;
}
}
@@ -3762,7 +4098,7 @@ sub process {
$dstat !~ /^(?:$Ident|-?$Constant),$/ && # 10, // foo(),
$dstat !~ /^(?:$Ident|-?$Constant);$/ && # foo();
$dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ && # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz
- $dstat !~ /^'X'$/ && # character constants
+ $dstat !~ /^'X'$/ && $dstat !~ /^'XX'$/ && # character constants
$dstat !~ /$exceptions/ &&
$dstat !~ /^\.$Ident\s*=/ && # .foo =
$dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo
@@ -4014,6 +4350,23 @@ sub process {
}
}
+# check for unnecessary "Out of Memory" messages
+ if ($line =~ /^\+.*\b$logFunctions\s*\(/ &&
+ $prevline =~ /^[ \+]\s*if\s*\(\s*(\!\s*|NULL\s*==\s*)?($Lval)(\s*==\s*NULL\s*)?\s*\)/ &&
+ (defined $1 || defined $3) &&
+ $linenr > 3) {
+ my $testval = $2;
+ my $testline = $lines[$linenr - 3];
+
+ my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0);
+# print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n");
+
+ if ($c =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|(?:dev_)?alloc_skb)/) {
+ WARN("OOM_MESSAGE",
+ "Possible unnecessary 'out of memory' message\n" . $hereprev);
+ }
+ }
+
# check for bad placement of section $InitAttribute (e.g.: __initdata)
if ($line =~ /(\b$InitAttribute\b)/) {
my $attr = $1;
@@ -4027,7 +4380,7 @@ sub process {
WARN("MISPLACED_INIT",
"$attr should be placed after $var\n" . $herecurr))) &&
$fix) {
- $fixed[$linenr - 1] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
+ $fixed[$fixlinenr] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
}
}
}
@@ -4041,7 +4394,7 @@ sub process {
if (ERROR("INIT_ATTRIBUTE",
"Use of const init definition must use ${attr_prefix}initconst\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/$InitAttributeData/${attr_prefix}initconst/;
}
}
@@ -4052,12 +4405,12 @@ sub process {
if (ERROR("INIT_ATTRIBUTE",
"Use of $attr requires a separate use of const\n" . $herecurr) &&
$fix) {
- my $lead = $fixed[$linenr - 1] =~
+ my $lead = $fixed[$fixlinenr] =~
/(^\+\s*(?:static\s+))/;
$lead = rtrim($1);
$lead = "$lead " if ($lead !~ /^\+$/);
$lead = "${lead}const ";
- $fixed[$linenr - 1] =~ s/(^\+\s*(?:static\s+))/$lead/;
+ $fixed[$fixlinenr] =~ s/(^\+\s*(?:static\s+))/$lead/;
}
}
@@ -4070,7 +4423,7 @@ sub process {
if (WARN("CONSTANT_CONVERSION",
"$constant_func should be $func\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b$constant_func\b/$func/g;
+ $fixed[$fixlinenr] =~ s/\b$constant_func\b/$func/g;
}
}
@@ -4120,7 +4473,7 @@ sub process {
if (ERROR("SPACING",
"exactly one space required after that #$1\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /;
}
@@ -4168,7 +4521,7 @@ sub process {
if (WARN("INLINE",
"plain inline is preferred over $1\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b(__inline__|__inline)\b/inline/;
+ $fixed[$fixlinenr] =~ s/\b(__inline__|__inline)\b/inline/;
}
}
@@ -4193,7 +4546,7 @@ sub process {
if (WARN("PREFER_PRINTF",
"__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
+ $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
}
}
@@ -4204,7 +4557,7 @@ sub process {
if (WARN("PREFER_SCANF",
"__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
+ $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
}
}
@@ -4219,7 +4572,7 @@ sub process {
if (WARN("SIZEOF_PARENTHESIS",
"sizeof $1 should be sizeof($1)\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
+ $fixed[$fixlinenr] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
}
}
@@ -4242,7 +4595,7 @@ sub process {
if (WARN("PREFER_SEQ_PUTS",
"Prefer seq_puts to seq_printf\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\bseq_printf\b/seq_puts/;
+ $fixed[$fixlinenr] =~ s/\bseq_printf\b/seq_puts/;
}
}
}
@@ -4271,7 +4624,7 @@ sub process {
if (WARN("PREFER_ETHER_ADDR_COPY",
"Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
+ $fixed[$fixlinenr] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
}
}
@@ -4359,7 +4712,7 @@ sub process {
if (CHK("AVOID_EXTERNS",
"extern prototypes should be avoided in .h files\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
+ $fixed[$fixlinenr] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
}
}
@@ -4419,23 +4772,24 @@ sub process {
# check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc
if ($^V && $^V ge 5.10.0 &&
- $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/) {
+ $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) {
my $oldfunc = $3;
my $a1 = $4;
my $a2 = $10;
my $newfunc = "kmalloc_array";
$newfunc = "kcalloc" if ($oldfunc eq "kzalloc");
- if ($a1 =~ /^sizeof\s*\S/ || $a2 =~ /^sizeof\s*\S/) {
+ my $r1 = $a1;
+ my $r2 = $a2;
+ if ($a1 =~ /^sizeof\s*\S/) {
+ $r1 = $a2;
+ $r2 = $a1;
+ }
+ if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ &&
+ !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) {
if (WARN("ALLOC_WITH_MULTIPLY",
"Prefer $newfunc over $oldfunc with multiply\n" . $herecurr) &&
$fix) {
- my $r1 = $a1;
- my $r2 = $a2;
- if ($a1 =~ /^sizeof\s*\S/) {
- $r1 = $a2;
- $r2 = $a1;
- }
- $fixed[$linenr - 1] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
+ $fixed[$fixlinenr] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
}
}
@@ -4459,17 +4813,17 @@ sub process {
if (WARN("ONE_SEMICOLON",
"Statements terminations use 1 semicolon\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/(\s*;\s*){2,}$/;/g;
+ $fixed[$fixlinenr] =~ s/(\s*;\s*){2,}$/;/g;
}
}
-# check for case / default statements not preceeded by break/fallthrough/switch
+# check for case / default statements not preceded by break/fallthrough/switch
if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) {
my $has_break = 0;
my $has_statement = 0;
my $count = 0;
my $prevline = $linenr;
- while ($prevline > 1 && $count < 3 && !$has_break) {
+ while ($prevline > 1 && ($file || $count < 3) && !$has_break) {
$prevline--;
my $rline = $rawlines[$prevline - 1];
my $fline = $lines[$prevline - 1];
@@ -4507,7 +4861,7 @@ sub process {
if (WARN("USE_FUNC",
"__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b__FUNCTION__\b/__func__/g;
+ $fixed[$fixlinenr] =~ s/\b__FUNCTION__\b/__func__/g;
}
}
@@ -4750,12 +5104,16 @@ sub process {
hash_show_words(\%use_type, "Used");
hash_show_words(\%ignore_type, "Ignored");
- if ($clean == 0 && $fix && "@rawlines" ne "@fixed") {
+ if ($clean == 0 && $fix &&
+ ("@rawlines" ne "@fixed" ||
+ $#fixed_inserted >= 0 || $#fixed_deleted >= 0)) {
my $newfile = $filename;
$newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace);
my $linecount = 0;
my $f;
+ @fixed = fix_inserted_deleted_lines(\@fixed, \@fixed_inserted, \@fixed_deleted);
+
open($f, '>', $newfile)
or die "$P: Can't open $newfile for write\n";
foreach my $fixed_line (@fixed) {
@@ -4763,7 +5121,7 @@ sub process {
if ($file) {
if ($linecount > 3) {
$fixed_line =~ s/^\+//;
- print $f $fixed_line. "\n";
+ print $f $fixed_line . "\n";
}
} else {
print $f $fixed_line . "\n";
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index c05d586b1fee..899b4230320e 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -52,14 +52,12 @@ my (@stack, $re, $dre, $x, $xs, $funcre);
#8000008a: 20 1d sub sp,4
#80000ca8: fa cd 05 b0 sub sp,sp,1456
$re = qr/^.*sub.*sp.*,([0-9]{1,8})/o;
- } elsif ($arch =~ /^i[3456]86$/) {
+ } elsif ($arch =~ /^x86(_64)?$/ || $arch =~ /^i[3456]86$/) {
#c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp
- $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%esp$/o;
- $dre = qr/^.*[as][du][db] (%.*),\%esp$/o;
- } elsif ($arch eq 'x86_64') {
- # 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp
- $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%rsp$/o;
- $dre = qr/^.*[as][du][db] (\%.*),\%rsp$/o;
+ # or
+ # 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp
+ $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%(e|r)sp$/o;
+ $dre = qr/^.*[as][du][db] (%.*),\%(e|r)sp$/o;
} elsif ($arch eq 'ia64') {
#e0000000044011fc: 01 0f fc 8c adds r12=-384,r12
$re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o;
diff --git a/scripts/coccinelle/free/ifnullfree.cocci b/scripts/coccinelle/free/ifnullfree.cocci
new file mode 100644
index 000000000000..9321fe328a97
--- /dev/null
+++ b/scripts/coccinelle/free/ifnullfree.cocci
@@ -0,0 +1,54 @@
+/// NULL check before some freeing functions is not needed.
+///
+/// Based on checkpatch warning
+/// "kfree(NULL) is safe this check is probably not required"
+/// and kfreeaddr.cocci by Julia Lawall.
+///
+// Copyright: (C) 2014 Fabian Frederick. GPLv2.
+// Comments: -
+// Options: --no-includes --include-headers
+
+virtual patch
+virtual org
+virtual report
+virtual context
+
+@r2 depends on patch@
+expression E;
+@@
+- if (E)
+(
+- kfree(E);
++ kfree(E);
+|
+- debugfs_remove(E);
++ debugfs_remove(E);
+|
+- debugfs_remove_recursive(E);
++ debugfs_remove_recursive(E);
+|
+- usb_free_urb(E);
++ usb_free_urb(E);
+)
+
+@r depends on context || report || org @
+expression E;
+position p;
+@@
+
+* if (E)
+* \(kfree@p\|debugfs_remove@p\|debugfs_remove_recursive@p\|usb_free_urb\)(E);
+
+@script:python depends on org@
+p << r.p;
+@@
+
+cocci.print_main("NULL check before that freeing function is not needed", p)
+
+@script:python depends on report@
+p << r.p;
+@@
+
+msg = "WARNING: NULL check before freeing functions like kfree, debugfs_remove, debugfs_remove_recursive or usb_free_urb is not needed. Maybe consider reorganizing relevant code to avoid passing NULL values."
+coccilib.report.print_report(p[0], msg)
+
diff --git a/scripts/tags.sh b/scripts/tags.sh
index e6b011fe1d0d..cbfd269a6011 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -168,6 +168,7 @@ exuberant()
--extra=+f --c-kinds=+px \
--regex-asm='/^(ENTRY|_GLOBAL)\(([^)]*)\).*/\2/' \
--regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \
+ --regex-c='/^COMPAT_SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/compat_sys_\1/' \
--regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/' \
--regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1/' \
--regex-c++='/PAGEFLAG\(([^,)]*).*/Page\1/' \
@@ -231,6 +232,7 @@ emacs()
all_target_sources | xargs $1 -a \
--regex='/^\(ENTRY\|_GLOBAL\)(\([^)]*\)).*/\2/' \
--regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' \
+ --regex='/^COMPAT_SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/compat_sys_\1/' \
--regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/' \
--regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1/' \
--regex='/PAGEFLAG(\([^,)]*\).*/Page\1/' \