summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2014-07-18 17:44:06 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2014-07-18 17:44:06 +1000
commit706bf313be879cb232ccb0f7888f4cea85c798bf (patch)
tree16a36f6932f9b8e7c7e99aa19281d7b185be58d4
parent4cba168ca6518e61ea29757e390a9eee4759075d (diff)
parent9f5b39070c6c1ea062cea18ecd3e767e1dc3671a (diff)
Merge branch 'akpm-current/current'
Conflicts: arch/x86/syscalls/syscall_64.tbl
-rw-r--r--CREDITS7
-rw-r--r--Documentation/ABI/testing/sysfs-fs-nilfs2269
-rw-r--r--Documentation/RCU/whatisRCU.txt2
-rw-r--r--Documentation/cgroups/memcg_test.txt160
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--Documentation/leds/leds-class.txt3
-rw-r--r--Documentation/oops-tracing.txt2
-rw-r--r--Documentation/printk-formats.txt6
-rw-r--r--Documentation/sysctl/kernel.txt1
-rw-r--r--Documentation/trace/postprocess/trace-vmscan-postprocess.pl53
-rw-r--r--MAINTAINERS21
-rw-r--r--Makefile19
-rw-r--r--arch/alpha/include/asm/Kbuild1
-rw-r--r--arch/alpha/include/asm/scatterlist.h6
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/scatterlist.h12
-rw-r--r--arch/arm/mm/dma-mapping.c1
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/cris/include/asm/Kbuild1
-rw-r--r--arch/cris/include/asm/scatterlist.h6
-rw-r--r--arch/frv/include/asm/Kbuild1
-rw-r--r--arch/frv/include/asm/scatterlist.h6
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/include/asm/Kbuild1
-rw-r--r--arch/ia64/include/asm/scatterlist.h7
-rw-r--r--arch/ia64/kernel/time.c15
-rw-r--r--arch/m32r/include/asm/Kbuild1
-rw-r--r--arch/m32r/include/asm/scatterlist.h6
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/m68k/kernel/sys_m68k.c18
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/scatterlist.h1
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mn10300/include/asm/Kbuild1
-rw-r--r--arch/mn10300/include/asm/scatterlist.h16
-rw-r--r--arch/powerpc/Kconfig3
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/scatterlist.h17
-rw-r--r--arch/powerpc/kvm/Makefile1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c19
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.c240
-rw-r--r--arch/powerpc/kvm/book3s_hv_cma.h27
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c1
-rw-r--r--arch/powerpc/platforms/44x/warp.c1
-rw-r--r--arch/powerpc/platforms/52xx/efika.c1
-rw-r--r--arch/powerpc/platforms/amigaone/setup.c1
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/scatterlist.h3
-rw-r--r--arch/score/include/asm/Kbuild1
-rw-r--r--arch/score/include/asm/scatterlist.h6
-rw-r--r--arch/sh/Kconfig2
-rw-r--r--arch/sh/Makefile3
-rw-r--r--arch/sh/drivers/dma/Kconfig5
-rw-r--r--arch/sh/include/cpu-sh4/cpu/dma-register.h1
-rw-r--r--arch/sh/include/cpu-sh4a/cpu/dma.h3
-rw-r--r--arch/sh/kernel/cpu/sh4a/clock-sh7724.c4
-rw-r--r--arch/sh/kernel/time.c4
-rw-r--r--arch/sh/mm/asids-debugfs.c4
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/scatterlist.h8
-rw-r--r--arch/tile/Kconfig2
-rw-r--r--arch/tile/kernel/module.c2
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/x86/Kbuild4
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/include/asm/Kbuild3
-rw-r--r--arch/x86/include/asm/crash.h9
-rw-r--r--arch/x86/include/asm/kexec-bzimage64.h6
-rw-r--r--arch/x86/include/asm/kexec.h45
-rw-r--r--arch/x86/include/asm/numa.h1
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/scatterlist.h8
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/crash.c563
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c532
-rw-r--r--arch/x86/kernel/machine_kexec_64.c228
-rw-r--r--arch/x86/mm/fault.c5
-rw-r--r--arch/x86/mm/init_64.c36
-rw-r--r--arch/x86/mm/numa.c34
-rw-r--r--arch/x86/purgatory/Makefile30
-rw-r--r--arch/x86/purgatory/entry64.S101
-rw-r--r--arch/x86/purgatory/purgatory.c72
-rw-r--r--arch/x86/purgatory/setup-x86_64.S58
-rw-r--r--arch/x86/purgatory/sha256.c283
-rw-r--r--arch/x86/purgatory/sha256.h22
-rw-r--r--arch/x86/purgatory/stack.S19
-rw-r--r--arch/x86/purgatory/string.c13
-rw-r--r--arch/x86/syscalls/syscall_64.tbl1
-rw-r--r--block/bio-integrity.c2
-rw-r--r--block/genhd.c2
-rw-r--r--crypto/zlib.c8
-rw-r--r--drivers/ata/Kconfig1
-rw-r--r--drivers/ata/libata-core.c72
-rw-r--r--drivers/base/Kconfig10
-rw-r--r--drivers/base/dma-contiguous.c220
-rw-r--r--drivers/base/memory.c30
-rw-r--r--drivers/base/node.c2
-rw-r--r--drivers/block/zram/zram_drv.c93
-rw-r--r--drivers/block/zram/zram_drv.h29
-rw-r--r--drivers/firmware/efi/runtime-map.c21
-rw-r--r--drivers/firmware/memmap.c6
-rw-r--r--drivers/gpu/drm/drm_hashtab.c2
-rw-r--r--drivers/hwmon/asus_atk0110.c2
-rw-r--r--drivers/input/Kconfig9
-rw-r--r--drivers/input/Makefile3
-rw-r--r--drivers/input/input.c6
-rw-r--r--drivers/input/leds.c249
-rw-r--r--drivers/leds/Kconfig3
-rw-r--r--drivers/lguest/core.c7
-rw-r--r--drivers/misc/ti-st/st_core.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c2
-rw-r--r--drivers/net/irda/donauboe.c15
-rw-r--r--drivers/parport/parport_ip32.c2
-rw-r--r--drivers/rtc/Kconfig21
-rw-r--r--drivers/rtc/Makefile4
-rw-r--r--drivers/rtc/interface.c2
-rw-r--r--drivers/rtc/rtc-ds1343.c75
-rw-r--r--drivers/rtc/rtc-ds1742.c2
-rw-r--r--drivers/rtc/rtc-efi-platform.c30
-rw-r--r--drivers/rtc/rtc-efi.c32
-rw-r--r--drivers/staging/android/binder.c4
-rw-r--r--drivers/staging/lustre/lustre/libcfs/hash.c4
-rw-r--r--drivers/tty/Kconfig4
-rw-r--r--drivers/tty/vt/keyboard.c110
-rw-r--r--drivers/video/backlight/backlight.c2
-rw-r--r--fs/adfs/adfs.h1
-rw-r--r--fs/adfs/dir.c2
-rw-r--r--fs/adfs/dir_fplus.c9
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/bfs/inode.c8
-rw-r--r--fs/binfmt_elf.c21
-rw-r--r--fs/cachefiles/bind.c6
-rw-r--r--fs/cachefiles/daemon.c6
-rw-r--r--fs/cifs/cifsacl.c2
-rw-r--r--fs/cifs/cifssmb.c20
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smb2file.c4
-rw-r--r--fs/cifs/smb2misc.c38
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/cifs/smb2pdu.h28
-rw-r--r--fs/coda/cache.c2
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c3
-rw-r--r--fs/coda/file.c2
-rw-r--r--fs/coda/inode.c4
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/coda/psdev.c2
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/compat.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/cramfs/inode.c45
-rw-r--r--fs/cramfs/uncompress.c10
-rw-r--r--fs/dlm/debug_fs.c15
-rw-r--r--fs/efs/namei.c11
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/ore_raid.c2
-rw-r--r--fs/ext4/fsync.c5
-rw-r--r--fs/fscache/main.c4
-rw-r--r--fs/hfsplus/catalog.c89
-rw-r--r--fs/hfsplus/dir.c11
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hpfs/dnode.c17
-rw-r--r--fs/isofs/Makefile2
-rw-r--r--fs/isofs/compress.c10
-rw-r--r--fs/isofs/export.c11
-rw-r--r--fs/isofs/inode.c88
-rw-r--r--fs/isofs/namei.c5
-rw-r--r--fs/isofs/rock.c44
-rw-r--r--fs/jffs2/compr_zlib.c7
-rw-r--r--fs/logfs/readwrite.c15
-rw-r--r--fs/mpage.c23
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nilfs2/Makefile2
-rw-r--r--fs/nilfs2/nilfs.h8
-rw-r--r--fs/nilfs2/super.c11
-rw-r--r--fs/nilfs2/sysfs.c1137
-rw-r--r--fs/nilfs2/sysfs.h176
-rw-r--r--fs/nilfs2/the_nilfs.c17
-rw-r--r--fs/nilfs2/the_nilfs.h20
-rw-r--r--fs/notify/inode_mark.c2
-rw-r--r--fs/notify/vfsmount_mark.c2
-rw-r--r--fs/ntfs/file.c3
-rw-r--r--fs/ocfs2/alloc.c15
-rw-r--r--fs/ocfs2/aops.c21
-rw-r--r--fs/ocfs2/cluster/quorum.c13
-rw-r--r--fs/ocfs2/cluster/tcp.c45
-rw-r--r--fs/ocfs2/cluster/tcp.h1
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c5
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c12
-rw-r--r--fs/ocfs2/inode.c10
-rw-r--r--fs/ocfs2/ioctl.c129
-rw-r--r--fs/ocfs2/move_extents.c2
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/slot_map.c2
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/proc/base.c8
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/generic.c32
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/proc_tty.c4
-rw-r--r--fs/proc/root.c4
-rw-r--r--fs/proc/vmcore.c82
-rw-r--r--fs/pstore/ram_core.c2
-rw-r--r--fs/qnx6/Makefile1
-rw-r--r--fs/qnx6/dir.c26
-rw-r--r--fs/qnx6/inode.c99
-rw-r--r--fs/qnx6/namei.c6
-rw-r--r--fs/qnx6/qnx6.h12
-rw-r--r--fs/qnx6/super_mmi.c22
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/do_balan.c2
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/ibalance.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/item_ops.c4
-rw-r--r--fs/reiserfs/lbalance.c2
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c2
-rw-r--r--fs/reiserfs/stree.c2
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/reiserfs/xattr.c22
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/romfs/super.c23
-rw-r--r--fs/seq_file.c30
-rw-r--r--fs/squashfs/file_direct.c2
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/ubifs/io.c2
-rw-r--r--fs/ubifs/recovery.c1
-rw-r--r--fs/ubifs/super.c12
-rw-r--r--fs/ufs/Makefile1
-rw-r--r--fs/ufs/inode.c32
-rw-r--r--fs/ufs/super.c304
-rw-r--r--fs/ufs/ufs.h10
-rw-r--r--fs/xattr.c2
-rw-r--r--include/linux/bitmap.h62
-rw-r--r--include/linux/byteorder/generic.h2
-rw-r--r--include/linux/cma.h27
-rw-r--r--include/linux/crc64_ecma.h56
-rw-r--r--include/linux/decompress/bunzip2.h8
-rw-r--r--include/linux/decompress/generic.h10
-rw-r--r--include/linux/decompress/inflate.h8
-rw-r--r--include/linux/decompress/unlz4.h8
-rw-r--r--include/linux/decompress/unlzma.h8
-rw-r--r--include/linux/decompress/unlzo.h8
-rw-r--r--include/linux/decompress/unxz.h8
-rw-r--r--include/linux/dma-contiguous.h11
-rw-r--r--include/linux/efi.h19
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/glob.h10
-rw-r--r--include/linux/huge_mm.h4
-rw-r--r--include/linux/hugetlb.h1
-rw-r--r--include/linux/input.h21
-rw-r--r--include/linux/ioport.h6
-rw-r--r--include/linux/kernel.h38
-rw-r--r--include/linux/kexec.h101
-rw-r--r--include/linux/klist.h2
-rw-r--r--include/linux/list.h14
-rw-r--r--include/linux/memblock.h4
-rw-r--r--include/linux/memcontrol.h98
-rw-r--r--include/linux/memory_hotplug.h9
-rw-r--r--include/linux/mm_types.h1
-rw-r--r--include/linux/mmdebug.h2
-rw-r--r--include/linux/mmzone.h219
-rw-r--r--include/linux/page-flags.h21
-rw-r--r--include/linux/page_cgroup.h70
-rw-r--r--include/linux/pagemap.h15
-rw-r--r--include/linux/printk.h2
-rw-r--r--include/linux/rculist.h8
-rw-r--r--include/linux/scatterlist.h2
-rw-r--r--include/linux/sched.h13
-rw-r--r--include/linux/shm.h18
-rw-r--r--include/linux/string.h1
-rw-r--r--include/linux/swap.h16
-rw-r--r--include/linux/syscalls.h4
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/user_namespace.h6
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--include/linux/zbud.h2
-rw-r--r--include/linux/zlib.h122
-rw-r--r--include/linux/zpool.h106
-rw-r--r--include/scsi/scsi.h2
-rw-r--r--include/trace/events/migrate.h1
-rw-r--r--include/trace/events/pagemap.h16
-rw-r--r--include/uapi/linux/kexec.h11
-rw-r--r--init/Kconfig51
-rw-r--r--init/do_mounts.c12
-rw-r--r--init/do_mounts_rd.c10
-rw-r--r--init/initramfs.c58
-rw-r--r--ipc/shm.c75
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/acct.c30
-rw-r--r--kernel/auditfilter.c4
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/events/uprobes.c15
-rw-r--r--kernel/exit.c49
-rw-r--r--kernel/fork.c77
-rw-r--r--kernel/gcov/fs.c3
-rw-r--r--kernel/kallsyms.c2
-rw-r--r--kernel/kexec.c1276
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/printk/printk.c120
-rw-r--r--kernel/resource.c101
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/test_kprobes.c87
-rw-r--r--kernel/user_namespace.c6
-rw-r--r--kernel/watchdog.c11
-rw-r--r--lib/Kconfig47
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/Makefile3
-rw-r--r--lib/bitmap.c111
-rw-r--r--lib/cmdline.c15
-rw-r--r--lib/crc64_ecma.c341
-rw-r--r--lib/decompress.c2
-rw-r--r--lib/decompress_bunzip2.c26
-rw-r--r--lib/decompress_inflate.c12
-rw-r--r--lib/decompress_unlz4.c83
-rw-r--r--lib/decompress_unlzma.c28
-rw-r--r--lib/decompress_unlzo.c12
-rw-r--r--lib/decompress_unxz.c10
-rw-r--r--lib/glob.c287
-rw-r--r--lib/idr.c25
-rw-r--r--lib/kfifo.c6
-rw-r--r--lib/klist.c6
-rw-r--r--lib/list_sort.c71
-rw-r--r--lib/scatterlist.c4
-rw-r--r--lib/string_helpers.c15
-rw-r--r--lib/test-kstrtox.c2
-rw-r--r--lib/vsprintf.c20
-rw-r--r--lib/zlib_deflate/deflate.c143
-rw-r--r--lib/zlib_inflate/inflate.c132
-rw-r--r--mm/Kconfig54
-rw-r--r--mm/Makefile2
-rw-r--r--mm/cma.c335
-rw-r--r--mm/compaction.c17
-rw-r--r--mm/filemap.c40
-rw-r--r--mm/gup.c18
-rw-r--r--mm/huge_memory.c63
-rw-r--r--mm/hugetlb.c126
-rw-r--r--mm/hwpoison-inject.c3
-rw-r--r--mm/internal.h2
-rw-r--r--mm/madvise.c3
-rw-r--r--mm/memcontrol.c1588
-rw-r--r--mm/memory-failure.c14
-rw-r--r--mm/memory.c94
-rw-r--r--mm/memory_hotplug.c17
-rw-r--r--mm/migrate.c44
-rw-r--r--mm/mlock.c9
-rw-r--r--mm/mmap.c5
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/page_alloc.c148
-rw-r--r--mm/readahead.c3
-rw-r--r--mm/rmap.c30
-rw-r--r--mm/shmem.c186
-rw-r--r--mm/slab.c505
-rw-r--r--mm/slab.h22
-rw-r--r--mm/slab_common.c101
-rw-r--r--mm/slub.c201
-rw-r--r--mm/swap.c54
-rw-r--r--mm/swap_state.c8
-rw-r--r--mm/swapfile.c21
-rw-r--r--mm/truncate.c20
-rw-r--r--mm/util.c132
-rw-r--r--mm/vmalloc.c30
-rw-r--r--mm/vmscan.c317
-rw-r--r--mm/vmstat.c150
-rw-r--r--mm/zbud.c98
-rw-r--r--mm/zpool.c364
-rw-r--r--mm/zsmalloc.c86
-rw-r--r--mm/zswap.c81
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv6/addrlabel.c2
-rw-r--r--net/xfrm/xfrm_policy.c4
-rw-r--r--scripts/.gitignore1
-rw-r--r--scripts/Makefile1
-rw-r--r--scripts/basic/.gitignore1
-rw-r--r--scripts/basic/Makefile1
-rw-r--r--scripts/basic/bin2c.c (renamed from scripts/bin2c.c)7
-rwxr-xr-xscripts/checkpatch.pl499
-rwxr-xr-xscripts/checkstack.pl12
-rw-r--r--scripts/coccinelle/free/ifnullfree.cocci54
-rwxr-xr-xscripts/tags.sh2
403 files changed, 11905 insertions, 5563 deletions
diff --git a/CREDITS b/CREDITS
index 28ee1514b9de..03343bf820e4 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1381,6 +1381,9 @@ S: 17 rue Danton
S: F - 94270 Le Kremlin-Bicêtre
S: France
+N: Jack Hammer
+D: IBM ServeRAID RAID (ips) driver maintenance
+
N: Greg Hankins
E: gregh@cc.gatech.edu
D: fixed keyboard driver to separate LED and locking status
@@ -1691,6 +1694,10 @@ S: Reading
S: RG6 2NU
S: United Kingdom
+N: Dave Jeffery
+E: dhjeffery@gmail.com
+D: SCSI hacks and IBM ServeRAID RAID driver maintenance
+
N: Jakub Jelinek
E: jakub@redhat.com
W: http://sunsite.mff.cuni.cz/~jj
diff --git a/Documentation/ABI/testing/sysfs-fs-nilfs2 b/Documentation/ABI/testing/sysfs-fs-nilfs2
new file mode 100644
index 000000000000..304ba84a973a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-nilfs2
@@ -0,0 +1,269 @@
+
+What: /sys/fs/nilfs2/features/revision
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show current revision of NILFS file system driver.
+ This value informs about file system revision that
+ driver is ready to support.
+
+What: /sys/fs/nilfs2/features/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/features group.
+
+What: /sys/fs/nilfs2/<device>/revision
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show NILFS file system revision on volume.
+ This value informs about metadata structures'
+ revision on mounted volume.
+
+What: /sys/fs/nilfs2/<device>/blocksize
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's block size in bytes.
+
+What: /sys/fs/nilfs2/<device>/device_size
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume size in bytes.
+
+What: /sys/fs/nilfs2/<device>/free_blocks
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of free blocks on volume.
+
+What: /sys/fs/nilfs2/<device>/uuid
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's UUID (Universally Unique Identifier).
+
+What: /sys/fs/nilfs2/<device>/volume_name
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's label.
+
+What: /sys/fs/nilfs2/<device>/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device> group.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show last write time of super block in human-readable
+ format.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show last write time of super block in seconds.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show current write count of super block.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_update_frequency
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show/Set interval of periodical update of superblock
+ (in seconds).
+
+What: /sys/fs/nilfs2/<device>/superblock/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/superblock
+ group.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_pseg_block
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show start block number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_sequence
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show sequence value of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show checkpoint number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/current_seg_sequence
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show segment sequence counter.
+
+What: /sys/fs/nilfs2/<device>/segctor/current_last_full_seg
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show index number of the latest full segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_full_seg
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show index number of the full segment index
+ to be used next.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_pseg_offset
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show offset of next partial segment in the current
+ full segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show next checkpoint number.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment in
+ human-readable format.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment in seconds.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment not for cleaner
+ operation in human-readable format.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment not for cleaner
+ operation in seconds.
+
+What: /sys/fs/nilfs2/<device>/segctor/dirty_data_blocks_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of dirty data blocks.
+
+What: /sys/fs/nilfs2/<device>/segctor/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/segctor
+ group.
+
+What: /sys/fs/nilfs2/<device>/segments/segments_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of segments on a volume.
+
+What: /sys/fs/nilfs2/<device>/segments/blocks_per_segment
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of blocks in segment.
+
+What: /sys/fs/nilfs2/<device>/segments/clean_segments
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of clean segments.
+
+What: /sys/fs/nilfs2/<device>/segments/dirty_segments
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of dirty segments.
+
+What: /sys/fs/nilfs2/<device>/segments/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/segments
+ group.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/checkpoints_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of checkpoints on volume.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/snapshots_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of snapshots on volume.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/last_seg_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show checkpoint number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/next_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show next checkpoint number.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/checkpoints
+ group.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe content of /sys/fs/nilfs2/<device>/mounted_snapshots
+ group.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/inodes_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of inodes for snapshot.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/blocks_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of blocks for snapshot.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/mounted_snapshots/<id>
+ group.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 49b8551a3b68..e48c57f1943b 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -818,7 +818,7 @@ RCU pointer/list update:
list_add_tail_rcu
list_del_rcu
list_replace_rcu
- hlist_add_after_rcu
+ hlist_add_behind_rcu
hlist_add_before_rcu
hlist_add_head_rcu
hlist_del_rcu
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index 80ac454704b8..8870b0212150 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -24,64 +24,27 @@ Please note that implementation details can be changed.
a page/swp_entry may be charged (usage += PAGE_SIZE) at
- mem_cgroup_charge_anon()
- Called at new page fault and Copy-On-Write.
-
- mem_cgroup_try_charge_swapin()
- Called at do_swap_page() (page fault on swap entry) and swapoff.
- Followed by charge-commit-cancel protocol. (With swap accounting)
- At commit, a charge recorded in swap_cgroup is removed.
-
- mem_cgroup_charge_file()
- Called at add_to_page_cache()
-
- mem_cgroup_cache_charge_swapin()
- Called at shmem's swapin.
-
- mem_cgroup_prepare_migration()
- Called before migration. "extra" charge is done and followed by
- charge-commit-cancel protocol.
- At commit, charge against oldpage or newpage will be committed.
+ mem_cgroup_try_charge()
2. Uncharge
a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
- mem_cgroup_uncharge_page()
- Called when an anonymous page is fully unmapped. I.e., mapcount goes
- to 0. If the page is SwapCache, uncharge is delayed until
- mem_cgroup_uncharge_swapcache().
-
- mem_cgroup_uncharge_cache_page()
- Called when a page-cache is deleted from radix-tree. If the page is
- SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache().
-
- mem_cgroup_uncharge_swapcache()
- Called when SwapCache is removed from radix-tree. The charge itself
- is moved to swap_cgroup. (If mem+swap controller is disabled, no
- charge to swap occurs.)
+ mem_cgroup_uncharge()
+ Called when a page's refcount goes down to 0.
mem_cgroup_uncharge_swap()
Called when swp_entry's refcnt goes down to 0. A charge against swap
disappears.
- mem_cgroup_end_migration(old, new)
- At success of migration old is uncharged (if necessary), a charge
- to new page is committed. At failure, charge to old page is committed.
-
3. charge-commit-cancel
- In some case, we can't know this "charge" is valid or not at charging
- (because of races).
- To handle such case, there are charge-commit-cancel functions.
- mem_cgroup_try_charge_XXX
- mem_cgroup_commit_charge_XXX
- mem_cgroup_cancel_charge_XXX
- these are used in swap-in and migration.
+ Memcg pages are charged in two steps:
+ mem_cgroup_try_charge()
+ mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
At try_charge(), there are no flags to say "this page is charged".
at this point, usage += PAGE_SIZE.
- At commit(), the function checks the page should be charged or not
- and set flags or avoid charging.(usage -= PAGE_SIZE)
+ At commit(), the page is associated with the memcg.
At cancel(), simply usage -= PAGE_SIZE.
@@ -91,18 +54,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
Anonymous page is newly allocated at
- page fault into MAP_ANONYMOUS mapping.
- Copy-On-Write.
- It is charged right after it's allocated before doing any page table
- related operations. Of course, it's uncharged when another page is used
- for the fault address.
-
- At freeing anonymous page (by exit() or munmap()), zap_pte() is called
- and pages for ptes are freed one by one.(see mm/memory.c). Uncharges
- are done at page_remove_rmap() when page_mapcount() goes down to 0.
-
- Another page freeing is by page-reclaim (vmscan.c) and anonymous
- pages are swapped out. In this case, the page is marked as
- PageSwapCache(). uncharge() routine doesn't uncharge the page marked
- as SwapCache(). It's delayed until __delete_from_swap_cache().
4.1 Swap-in.
At swap-in, the page is taken from swap-cache. There are 2 cases.
@@ -111,41 +62,6 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
(b) If the SwapCache has been mapped by processes, it has been
charged already.
- This swap-in is one of the most complicated work. In do_swap_page(),
- following events occur when pte is unchanged.
-
- (1) the page (SwapCache) is looked up.
- (2) lock_page()
- (3) try_charge_swapin()
- (4) reuse_swap_page() (may call delete_swap_cache())
- (5) commit_charge_swapin()
- (6) swap_free().
-
- Considering following situation for example.
-
- (A) The page has not been charged before (2) and reuse_swap_page()
- doesn't call delete_from_swap_cache().
- (B) The page has not been charged before (2) and reuse_swap_page()
- calls delete_from_swap_cache().
- (C) The page has been charged before (2) and reuse_swap_page() doesn't
- call delete_from_swap_cache().
- (D) The page has been charged before (2) and reuse_swap_page() calls
- delete_from_swap_cache().
-
- memory.usage/memsw.usage changes to this page/swp_entry will be
- Case (A) (B) (C) (D)
- Event
- Before (2) 0/ 1 0/ 1 1/ 1 1/ 1
- ===========================================
- (3) +1/+1 +1/+1 +1/+1 +1/+1
- (4) - 0/ 0 - -1/ 0
- (5) 0/-1 0/ 0 -1/-1 0/ 0
- (6) - 0/-1 - 0/-1
- ===========================================
- Result 1/ 1 1/ 1 1/ 1 1/ 1
-
- In any cases, charges to this page should be 1/ 1.
-
4.2 Swap-out.
At swap-out, typical state transition is below.
@@ -158,28 +74,20 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
swp_entry's refcnt -= 1.
- At (b), the page is marked as SwapCache and not uncharged.
- At (d), the page is removed from SwapCache and a charge in page_cgroup
- is moved to swap_cgroup.
-
Finally, at task exit,
(e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
- Here, a charge in swap_cgroup disappears.
5. Page Cache
Page Cache is charged at
- add_to_page_cache_locked().
- uncharged at
- - __remove_from_page_cache().
-
The logic is very clear. (About migration, see below)
Note: __remove_from_page_cache() is called by remove_from_page_cache()
and __remove_mapping().
6. Shmem(tmpfs) Page Cache
- Memcg's charge/uncharge have special handlers of shmem. The best way
- to understand shmem's page state transition is to read mm/shmem.c.
+ The best way to understand shmem's page state transition is to read
+ mm/shmem.c.
But brief explanation of the behavior of memcg around shmem will be
helpful to understand the logic.
@@ -192,56 +100,10 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
It's charged when...
- A new page is added to shmem's radix-tree.
- A swp page is read. (move a charge from swap_cgroup to page_cgroup)
- It's uncharged when
- - A page is removed from radix-tree and not SwapCache.
- - When SwapCache is removed, a charge is moved to swap_cgroup.
- - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup
- disappears.
7. Page Migration
- One of the most complicated functions is page-migration-handler.
- Memcg has 2 routines. Assume that we are migrating a page's contents
- from OLDPAGE to NEWPAGE.
-
- Usual migration logic is..
- (a) remove the page from LRU.
- (b) allocate NEWPAGE (migration target)
- (c) lock by lock_page().
- (d) unmap all mappings.
- (e-1) If necessary, replace entry in radix-tree.
- (e-2) move contents of a page.
- (f) map all mappings again.
- (g) pushback the page to LRU.
- (-) OLDPAGE will be freed.
-
- Before (g), memcg should complete all necessary charge/uncharge to
- NEWPAGE/OLDPAGE.
-
- The point is....
- - If OLDPAGE is anonymous, all charges will be dropped at (d) because
- try_to_unmap() drops all mapcount and the page will not be
- SwapCache.
-
- - If OLDPAGE is SwapCache, charges will be kept at (g) because
- __delete_from_swap_cache() isn't called at (e-1)
-
- - If OLDPAGE is page-cache, charges will be kept at (g) because
- remove_from_swap_cache() isn't called at (e-1)
-
- memcg provides following hooks.
-
- - mem_cgroup_prepare_migration(OLDPAGE)
- Called after (b) to account a charge (usage += PAGE_SIZE) against
- memcg which OLDPAGE belongs to.
-
- - mem_cgroup_end_migration(OLDPAGE, NEWPAGE)
- Called after (f) before (g).
- If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already
- charged, a charge by prepare_migration() is automatically canceled.
- If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE.
-
- But zap_pte() (by exit or munmap) can be called while migration,
- we have to check if OLDPAGE/NEWPAGE is a valid page after commit().
+
+ mem_cgroup_migrate()
8. LRU
Each memcg has its own private LRU. Now, its handling is under global
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 427b1dd12709..7d02a8b077fd 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1716,8 +1716,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
7 (KERN_DEBUG) debug-level messages
log_buf_len=n[KMG] Sets the size of the printk ring buffer,
- in bytes. n must be a power of two. The default
- size is set in the kernel config file.
+ in bytes. n must be a power of two and greater
+ than the minimal size. The minimal size is defined
+ by LOG_BUF_SHIFT kernel config parameter. There is
+ also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
+ that allows to increase the default size depending on
+ the number of CPUs. See init/Kconfig for more details.
logo.nologo [FB] Disables display of the built-in Linux logo.
This may be used to provide more screen space for
diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt
index 79699c200766..62261c04060a 100644
--- a/Documentation/leds/leds-class.txt
+++ b/Documentation/leds/leds-class.txt
@@ -2,9 +2,6 @@
LED handling under Linux
========================
-If you're reading this and thinking about keyboard leds, these are
-handled by the input subsystem and the led class is *not* needed.
-
In its simplest form, the LED class just allows control of LEDs from
userspace. LEDs appear in /sys/class/leds/. The maximum brightness of the
LED is defined in max_brightness file. The brightness file will set the brightness
diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt
index e3155995ddd8..beefb9f82902 100644
--- a/Documentation/oops-tracing.txt
+++ b/Documentation/oops-tracing.txt
@@ -268,6 +268,8 @@ characters, each representing a particular tainted value.
14: 'E' if an unsigned module has been loaded in a kernel supporting
module signature.
+ 15: 'L' if a soft lockup has previously occurred on the system.
+
The primary reason for the 'Tainted: ' string is to tell kernel
debuggers if this is a clean kernel or if anything unusual has
occurred. Tainting is permanent: even if an offending module is
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
index b4498218c474..3b56a991f52e 100644
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@@ -184,6 +184,12 @@ dentry names:
equivalent of %s dentry->d_name.name we used to use, %pd<n> prints
n last components. %pD does the same thing for struct file.
+task_struct comm name:
+
+ %pT
+
+ For printing task_struct->comm.
+
struct va_format:
%pV
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index c14374e71775..f79eb9666379 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -826,6 +826,7 @@ can be ORed together:
4096 - An out-of-tree module has been loaded.
8192 - An unsigned module has been loaded in a kernel supporting module
signature.
+16384 - A soft lockup has previously occurred on the system.
==============================================================
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
index 78c9a7b2b58f..8f961ef2b457 100644
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -47,6 +47,10 @@ use constant HIGH_KSWAPD_REWAKEUP => 21;
use constant HIGH_NR_SCANNED => 22;
use constant HIGH_NR_TAKEN => 23;
use constant HIGH_NR_RECLAIMED => 24;
+use constant HIGH_NR_FILE_SCANNED => 25;
+use constant HIGH_NR_ANON_SCANNED => 26;
+use constant HIGH_NR_FILE_RECLAIMED => 27;
+use constant HIGH_NR_ANON_RECLAIMED => 28;
my %perprocesspid;
my %perprocess;
@@ -56,14 +60,18 @@ my $opt_read_procstat;
my $total_wakeup_kswapd;
my ($total_direct_reclaim, $total_direct_nr_scanned);
+my ($total_direct_nr_file_scanned, $total_direct_nr_anon_scanned);
my ($total_direct_latency, $total_kswapd_latency);
my ($total_direct_nr_reclaimed);
+my ($total_direct_nr_file_reclaimed, $total_direct_nr_anon_reclaimed);
my ($total_direct_writepage_file_sync, $total_direct_writepage_file_async);
my ($total_direct_writepage_anon_sync, $total_direct_writepage_anon_async);
my ($total_kswapd_nr_scanned, $total_kswapd_wake);
+my ($total_kswapd_nr_file_scanned, $total_kswapd_nr_anon_scanned);
my ($total_kswapd_writepage_file_sync, $total_kswapd_writepage_file_async);
my ($total_kswapd_writepage_anon_sync, $total_kswapd_writepage_anon_async);
my ($total_kswapd_nr_reclaimed);
+my ($total_kswapd_nr_file_reclaimed, $total_kswapd_nr_anon_reclaimed);
# Catch sigint and exit on request
my $sigint_report = 0;
@@ -374,6 +382,7 @@ EVENT_PROCESS:
}
my $isolate_mode = $1;
my $nr_scanned = $4;
+ my $file = $6;
# To closer match vmstat scanning statistics, only count isolate_both
# and isolate_inactive as scanning. isolate_active is rotation
@@ -382,6 +391,11 @@ EVENT_PROCESS:
# isolate_both == 3
if ($isolate_mode != 2) {
$perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
+ if ($file == 1) {
+ $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned;
+ } else {
+ $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned;
+ }
}
} elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
$details = $6;
@@ -391,8 +405,19 @@ EVENT_PROCESS:
print " $regex_lru_shrink_inactive/o\n";
next;
}
+
my $nr_reclaimed = $4;
+ my $flags = $6;
+ my $file = 0;
+ if ($flags =~ /RECLAIM_WB_FILE/) {
+ $file = 1;
+ }
$perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed;
+ if ($file) {
+ $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED} += $nr_reclaimed;
+ } else {
+ $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED} += $nr_reclaimed;
+ }
} elsif ($tracepoint eq "mm_vmscan_writepage") {
$details = $6;
if ($details !~ /$regex_writepage/o) {
@@ -493,7 +518,11 @@ sub dump_stats {
$total_direct_reclaim += $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN};
$total_wakeup_kswapd += $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
$total_direct_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+ $total_direct_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+ $total_direct_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
$total_direct_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+ $total_direct_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+ $total_direct_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
$total_direct_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
$total_direct_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
$total_direct_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -513,7 +542,11 @@ sub dump_stats {
$stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN},
$stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD},
$stats{$process_pid}->{HIGH_NR_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
$stats{$process_pid}->{HIGH_NR_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC},
$this_reclaim_delay / 1000);
@@ -552,7 +585,11 @@ sub dump_stats {
$total_kswapd_wake += $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE};
$total_kswapd_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+ $total_kswapd_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+ $total_kswapd_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
$total_kswapd_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+ $total_kswapd_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+ $total_kswapd_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
$total_kswapd_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
$total_kswapd_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
$total_kswapd_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
@@ -563,7 +600,11 @@ sub dump_stats {
$stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE},
$stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP},
$stats{$process_pid}->{HIGH_NR_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
$stats{$process_pid}->{HIGH_NR_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
$stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC});
@@ -594,7 +635,11 @@ sub dump_stats {
print "\nSummary\n";
print "Direct reclaims: $total_direct_reclaim\n";
print "Direct reclaim pages scanned: $total_direct_nr_scanned\n";
+ print "Direct reclaim file pages scanned: $total_direct_nr_file_scanned\n";
+ print "Direct reclaim anon pages scanned: $total_direct_nr_anon_scanned\n";
print "Direct reclaim pages reclaimed: $total_direct_nr_reclaimed\n";
+ print "Direct reclaim file pages reclaimed: $total_direct_nr_file_reclaimed\n";
+ print "Direct reclaim anon pages reclaimed: $total_direct_nr_anon_reclaimed\n";
print "Direct reclaim write file sync I/O: $total_direct_writepage_file_sync\n";
print "Direct reclaim write anon sync I/O: $total_direct_writepage_anon_sync\n";
print "Direct reclaim write file async I/O: $total_direct_writepage_file_async\n";
@@ -604,7 +649,11 @@ sub dump_stats {
print "\n";
print "Kswapd wakeups: $total_kswapd_wake\n";
print "Kswapd pages scanned: $total_kswapd_nr_scanned\n";
+ print "Kswapd file pages scanned: $total_kswapd_nr_file_scanned\n";
+ print "Kswapd anon pages scanned: $total_kswapd_nr_anon_scanned\n";
print "Kswapd pages reclaimed: $total_kswapd_nr_reclaimed\n";
+ print "Kswapd file pages reclaimed: $total_kswapd_nr_file_reclaimed\n";
+ print "Kswapd anon pages reclaimed: $total_kswapd_nr_anon_reclaimed\n";
print "Kswapd reclaim write file sync I/O: $total_kswapd_writepage_file_sync\n";
print "Kswapd reclaim write anon sync I/O: $total_kswapd_writepage_anon_sync\n";
print "Kswapd reclaim write file async I/O: $total_kswapd_writepage_file_async\n";
@@ -629,7 +678,11 @@ sub aggregate_perprocesspid() {
$perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD} += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
$perprocess{$process}->{HIGH_KSWAPD_REWAKEUP} += $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP};
$perprocess{$process}->{HIGH_NR_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_SCANNED};
+ $perprocess{$process}->{HIGH_NR_FILE_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED};
+ $perprocess{$process}->{HIGH_NR_ANON_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED};
$perprocess{$process}->{HIGH_NR_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED};
+ $perprocess{$process}->{HIGH_NR_FILE_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+ $perprocess{$process}->{HIGH_NR_ANON_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
$perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
$perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
$perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
diff --git a/MAINTAINERS b/MAINTAINERS
index fd97e0c8a47c..78e1cb2f5dc3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3221,26 +3221,12 @@ T: git git://linuxtv.org/anttip/media_tree.git
S: Maintained
F: drivers/media/tuners/e4000*
-EATA-DMA SCSI DRIVER
-M: Michael Neuffer <mike@i-Connect.Net>
-L: linux-eata@i-connect.net
-L: linux-scsi@vger.kernel.org
-S: Maintained
-F: drivers/scsi/eata*
-
EATA ISA/EISA/PCI SCSI DRIVER
M: Dario Ballabio <ballabio_dario@emc.com>
L: linux-scsi@vger.kernel.org
S: Maintained
F: drivers/scsi/eata.c
-EATA-PIO SCSI DRIVER
-M: Michael Neuffer <mike@i-Connect.Net>
-L: linux-eata@i-connect.net
-L: linux-scsi@vger.kernel.org
-S: Maintained
-F: drivers/scsi/eata_pio.*
-
EC100 MEDIA DRIVER
M: Antti Palosaari <crope@iki.fi>
L: linux-media@vger.kernel.org
@@ -4485,10 +4471,7 @@ S: Supported
F: drivers/scsi/ibmvscsi/ibmvfc*
IBM ServeRAID RAID DRIVER
-P: Jack Hammer
-M: Dave Jeffery <ipslinux@adaptec.com>
-W: http://www.developer.ibm.com/welcome/netfinity/serveraid.html
-S: Supported
+S: Orphan
F: drivers/scsi/ips.*
ICH LPC AND GPIO DRIVER
@@ -7174,7 +7157,7 @@ F: drivers/ptp/*
F: include/linux/ptp_cl*
PTRACE SUPPORT
-M: Roland McGrath <roland@redhat.com>
+M: Roland McGrath <roland@hack.frob.com>
M: Oleg Nesterov <oleg@redhat.com>
S: Maintained
F: include/asm-generic/syscall.h
diff --git a/Makefile b/Makefile
index 2639babed462..d84f5cd3513a 100644
--- a/Makefile
+++ b/Makefile
@@ -637,6 +637,9 @@ else
KBUILD_CFLAGS += -O2
endif
+# Tell gcc to never replace conditional load with a non-conditional one
+KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0)
+
ifdef CONFIG_READABLE_ASM
# Disable optimizations that make assembler listings hard to read.
# reorder blocks reorders the control in the function
@@ -652,6 +655,22 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
endif
# Handle stack protector mode.
+#
+# Since kbuild can potentially perform two passes (first with the old
+# .config values and then with updated .config values), we cannot error out
+# if a desired compiler option is unsupported. If we were to error, kbuild
+# could never get to the second pass and actually notice that we changed
+# the option to something that was supported.
+#
+# Additionally, we don't want to fallback and/or silently change which compiler
+# flags will be used, since that leads to producing kernels with different
+# security feature characteristics depending on the compiler used. ("But I
+# selected CC_STACKPROTECTOR_STRONG! Why did it build with _REGULAR?!")
+#
+# The middle ground is to warn here so that the failed option is obvious, but
+# to let the build fail with bad compiler flags so that we can't produce a
+# kernel when there is a CONFIG and compiler mismatch.
+#
ifdef CONFIG_CC_STACKPROTECTOR_REGULAR
stackp-flag := -fstack-protector
ifeq ($(call cc-option, $(stackp-flag)),)
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 96e54bed5088..e858aa0ad8af 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/alpha/include/asm/scatterlist.h b/arch/alpha/include/asm/scatterlist.h
deleted file mode 100644
index 017d7471c3c4..000000000000
--- a/arch/alpha/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ALPHA_SCATTERLIST_H
-#define _ALPHA_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !(_ALPHA_SCATTERLIST_H) */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 1d610c823c5e..77632c416bd9 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -84,6 +84,7 @@ config ARM
<http://www.arm.linux.org.uk/>.
config ARM_HAS_SG_CHAIN
+ select ARCH_HAS_SG_CHAIN
bool
config NEED_SG_DMA_LENGTH
@@ -1968,6 +1969,8 @@ config XIP_PHYS_ADDR
config KEXEC
bool "Kexec system call (EXPERIMENTAL)"
depends on (!SMP || PM_SLEEP_SMP)
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index f5a357601983..70cd84eb7fda 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += poll.h
generic-y += preempt.h
generic-y += resource.h
generic-y += rwsem.h
+generic-y += scatterlist.h
generic-y += sections.h
generic-y += segment.h
generic-y += sembuf.h
diff --git a/arch/arm/include/asm/scatterlist.h b/arch/arm/include/asm/scatterlist.h
deleted file mode 100644
index cefdb8f898a1..000000000000
--- a/arch/arm/include/asm/scatterlist.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASMARM_SCATTERLIST_H
-#define _ASMARM_SCATTERLIST_H
-
-#ifdef CONFIG_ARM_HAS_SG_CHAIN
-#define ARCH_HAS_SG_CHAIN
-#endif
-
-#include <asm/memory.h>
-#include <asm/types.h>
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASMARM_SCATTERLIST_H */
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1f88db06b133..7a996aaa061e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -26,6 +26,7 @@
#include <linux/io.h>
#include <linux/vmalloc.h>
#include <linux/sizes.h>
+#include <linux/cma.h>
#include <asm/memory.h>
#include <asm/highmem.h>
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 736a42a1242b..4bcd435b6fd4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2,6 +2,7 @@ config ARM64
def_bool y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_HAS_OPP
+ select ARCH_HAS_SG_CHAIN
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_SUPPORTS_ATOMIC_RMW
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index afff5105909d..31742dfadff9 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += linkage.h
generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vga.h
generic-y += xor.h
diff --git a/arch/cris/include/asm/scatterlist.h b/arch/cris/include/asm/scatterlist.h
deleted file mode 100644
index f11f8f40ec4a..000000000000
--- a/arch/cris/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_CRIS_SCATTERLIST_H
-#define __ASM_CRIS_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !(__ASM_CRIS_SCATTERLIST_H) */
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 87b95eb8aee5..5b73921b6e9d 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -5,4 +5,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/frv/include/asm/scatterlist.h b/arch/frv/include/asm/scatterlist.h
deleted file mode 100644
index 0e5eb3018468..000000000000
--- a/arch/frv/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCATTERLIST_H
-#define _ASM_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* !_ASM_SCATTERLIST_H */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 2f3abcf8f6bc..8db0b5f02034 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -27,6 +27,7 @@ config IA64
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_VIRT_CPU_ACCOUNTING
+ select ARCH_HAS_SG_CHAIN
select VIRT_TO_BUS
select ARCH_DISCARD_MEMBLOCK
select GENERIC_IRQ_PROBE
@@ -547,6 +548,8 @@ source "drivers/sn/Kconfig"
config KEXEC
bool "kexec system call"
depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 0da4aa2602ae..e8317d2d6c8d 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -5,5 +5,6 @@ generic-y += hash.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vtime.h
diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h
deleted file mode 100644
index 08fd93bff1db..000000000000
--- a/arch/ia64/include/asm/scatterlist.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_IA64_SCATTERLIST_H
-#define _ASM_IA64_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_IA64_SCATTERLIST_H */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 71c52bc7c28d..a149c6731d65 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -384,21 +384,6 @@ static struct irqaction timer_irqaction = {
.name = "timer"
};
-static struct platform_device rtc_efi_dev = {
- .name = "rtc-efi",
- .id = -1,
-};
-
-static int __init rtc_init(void)
-{
- if (platform_device_register(&rtc_efi_dev) < 0)
- printk(KERN_ERR "unable to register rtc device...\n");
-
- /* not necessarily an error */
- return 0;
-}
-module_init(rtc_init);
-
void read_persistent_clock(struct timespec *ts)
{
efi_gettimeofday(ts);
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 67779a74b62d..accc10a3dc78 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/m32r/include/asm/scatterlist.h b/arch/m32r/include/asm/scatterlist.h
deleted file mode 100644
index 7370b8b6243e..000000000000
--- a/arch/m32r/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_M32R_SCATTERLIST_H
-#define _ASM_M32R_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_M32R_SCATTERLIST_H */
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 87b7c7581b1d..3ff8c9a25335 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -91,6 +91,8 @@ config MMU_SUN3
config KEXEC
bool "kexec system call"
depends on M68KCLASSIC
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 3a480b3df0d6..d2263a0bdc3d 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -376,7 +376,6 @@ cache_flush_060 (unsigned long addr, int scope, int cache, unsigned long len)
asmlinkage int
sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
{
- struct vm_area_struct *vma;
int ret = -EINVAL;
if (scope < FLUSH_SCOPE_LINE || scope > FLUSH_SCOPE_ALL ||
@@ -389,16 +388,23 @@ sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
if (!capable(CAP_SYS_ADMIN))
goto out;
} else {
+ struct vm_area_struct *vma;
+ bool invalid;
+
+ /* Check for overflow. */
+ if (addr + len < addr)
+ goto out;
+
/*
* Verify that the specified address region actually belongs
* to this process.
*/
- vma = find_vma (current->mm, addr);
ret = -EINVAL;
- /* Check for overflow. */
- if (addr + len < addr)
- goto out;
- if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end)
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, addr);
+ invalid = !vma || addr < vma->vm_start || addr + len > vma->vm_end;
+ up_read(&current->mm->mmap_sem);
+ if (invalid)
goto out;
}
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 35b3ecaf25d5..27a3acda6c19 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -7,5 +7,6 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += syscalls.h
generic-y += trace_clock.h
diff --git a/arch/microblaze/include/asm/scatterlist.h b/arch/microblaze/include/asm/scatterlist.h
deleted file mode 100644
index 35d786fe93ae..000000000000
--- a/arch/microblaze/include/asm/scatterlist.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/scatterlist.h>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4e238e6e661c..1f7bd3269efa 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2392,6 +2392,8 @@ source "kernel/Kconfig.preempt"
config KEXEC
bool "Kexec system call"
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 654d5ba6e310..ecbd6676bd33 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -6,4 +6,5 @@ generic-y += exec.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/mn10300/include/asm/scatterlist.h b/arch/mn10300/include/asm/scatterlist.h
deleted file mode 100644
index 7baa4006008a..000000000000
--- a/arch/mn10300/include/asm/scatterlist.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* MN10300 Scatterlist definitions
- *
- * Copyright (C) 2007 Matsushita Electric Industrial Co., Ltd.
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#ifndef _ASM_SCATTERLIST_H
-#define _ASM_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_SCATTERLIST_H */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 80b94b0add1f..a577609f8ed6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -111,6 +111,7 @@ config PPC
select HAVE_DMA_API_DEBUG
select HAVE_OPROFILE
select HAVE_DEBUG_KMEMLEAK
+ select ARCH_HAS_SG_CHAIN
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
@@ -398,6 +399,8 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
config KEXEC
bool "kexec system call"
depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 3fb1bc432f4f..7f23f162ce9c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,5 +4,6 @@ generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
generic-y += rwsem.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += vtime.h
diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h
deleted file mode 100644
index de1f620bd5c9..000000000000
--- a/arch/powerpc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _ASM_POWERPC_SCATTERLIST_H
-#define _ASM_POWERPC_SCATTERLIST_H
-/*
- * Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/dma.h>
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_POWERPC_SCATTERLIST_H */
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index ce569b6bf4d8..72905c30082e 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -90,7 +90,6 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
book3s_hv_builtin.o \
- book3s_hv_cma.o \
$(kvm-book3s_64-builtin-xics-objs-y)
endif
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2d154d9319b3..93218ae41f23 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,6 @@
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
-#include "book3s_hv_cma.h"
-
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
@@ -64,10 +62,10 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
}
kvm->arch.hpt_cma_alloc = 0;
- VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ memset((void *)hpt, 0, (1 << order));
kvm->arch.hpt_cma_alloc = 1;
}
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 3b41447482e5..329d7fdd0a6a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,12 +16,14 @@
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/sizes.h>
+#include <linux/cma.h>
#include <asm/cputable.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include "book3s_hv_cma.h"
+#define KVM_CMA_CHUNK_ORDER 18
+
/*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
* should be power of 2.
@@ -43,6 +45,8 @@ static unsigned long kvm_cma_resv_ratio = 5;
unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
EXPORT_SYMBOL_GPL(kvm_rma_pages);
+static struct cma *kvm_cma;
+
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Assumes POWER7 or PPC970. */
static inline int lpcr_rmls(unsigned long rma_size)
@@ -97,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma()
ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
if (!ri)
return NULL;
- page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+ page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
if (!page)
goto err_out;
atomic_set(&ri->use_count, 1);
@@ -112,7 +116,7 @@ EXPORT_SYMBOL_GPL(kvm_alloc_rma);
void kvm_release_rma(struct kvm_rma_info *ri)
{
if (atomic_dec_and_test(&ri->use_count)) {
- kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+ cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
kfree(ri);
}
}
@@ -131,16 +135,18 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages)
{
unsigned long align_pages = HPT_ALIGN_PAGES;
+ VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+
/* Old CPUs require HPT aligned on a multiple of its size */
if (!cpu_has_feature(CPU_FTR_ARCH_206))
align_pages = nr_pages;
- return kvm_alloc_cma(nr_pages, align_pages);
+ return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
}
EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
void kvm_release_hpt(struct page *page, unsigned long nr_pages)
{
- kvm_release_cma(page, nr_pages);
+ cma_release(kvm_cma, page, nr_pages);
}
EXPORT_SYMBOL_GPL(kvm_release_hpt);
@@ -179,7 +185,8 @@ void __init kvm_cma_reserve(void)
align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
- kvm_cma_declare_contiguous(selected_size, align_size);
+ cma_declare_contiguous(0, selected_size, 0, align_size,
+ KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
}
}
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
deleted file mode 100644
index d9d3d8553d51..000000000000
--- a/arch/powerpc/kvm/book3s_hv_cma.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-#define pr_fmt(fmt) "kvm_cma: " fmt
-
-#ifdef CONFIG_CMA_DEBUG
-#ifndef DEBUG
-# define DEBUG
-#endif
-#endif
-
-#include <linux/memblock.h>
-#include <linux/mutex.h>
-#include <linux/sizes.h>
-#include <linux/slab.h>
-
-#include "book3s_hv_cma.h"
-
-struct kvm_cma {
- unsigned long base_pfn;
- unsigned long count;
- unsigned long *bitmap;
-};
-
-static DEFINE_MUTEX(kvm_cma_mutex);
-static struct kvm_cma kvm_cma_area;
-
-/**
- * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
- * for kvm hash pagetable
- * @size: Size of the reserved memory.
- * @alignment: Alignment for the contiguous memory area
- *
- * This function reserves memory for kvm cma area. It should be
- * called by arch code when early allocator (memblock or bootmem)
- * is still activate.
- */
-long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
-{
- long base_pfn;
- phys_addr_t addr;
- struct kvm_cma *cma = &kvm_cma_area;
-
- pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
-
- if (!size)
- return -EINVAL;
- /*
- * Sanitise input arguments.
- * We should be pageblock aligned for CMA.
- */
- alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
- size = ALIGN(size, alignment);
- /*
- * Reserve memory
- * Use __memblock_alloc_base() since
- * memblock_alloc_base() panic()s.
- */
- addr = __memblock_alloc_base(size, alignment, 0);
- if (!addr) {
- base_pfn = -ENOMEM;
- goto err;
- } else
- base_pfn = PFN_DOWN(addr);
-
- /*
- * Each reserved area must be initialised later, when more kernel
- * subsystems (like slab allocator) are available.
- */
- cma->base_pfn = base_pfn;
- cma->count = size >> PAGE_SHIFT;
- pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
- return 0;
-err:
- pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
- return base_pfn;
-}
-
-/**
- * kvm_alloc_cma() - allocate pages from contiguous area
- * @nr_pages: Requested number of pages.
- * @align_pages: Requested alignment in number of pages
- *
- * This function allocates memory buffer for hash pagetable.
- */
-struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
-{
- int ret;
- struct page *page = NULL;
- struct kvm_cma *cma = &kvm_cma_area;
- unsigned long chunk_count, nr_chunk;
- unsigned long mask, pfn, pageno, start = 0;
-
-
- if (!cma || !cma->count)
- return NULL;
-
- pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
- (void *)cma, nr_pages, align_pages);
-
- if (!nr_pages)
- return NULL;
- /*
- * align mask with chunk size. The bit tracks pages in chunk size
- */
- VM_BUG_ON(!is_power_of_2(align_pages));
- mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
- BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
-
- chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
- nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-
- mutex_lock(&kvm_cma_mutex);
- for (;;) {
- pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
- start, nr_chunk, mask);
- if (pageno >= chunk_count)
- break;
-
- pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
- ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
- if (ret == 0) {
- bitmap_set(cma->bitmap, pageno, nr_chunk);
- page = pfn_to_page(pfn);
- memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
- break;
- } else if (ret != -EBUSY) {
- break;
- }
- pr_debug("%s(): memory range at %p is busy, retrying\n",
- __func__, pfn_to_page(pfn));
- /* try again with a bit different memory target */
- start = pageno + mask + 1;
- }
- mutex_unlock(&kvm_cma_mutex);
- pr_debug("%s(): returned %p\n", __func__, page);
- return page;
-}
-
-/**
- * kvm_release_cma() - release allocated pages for hash pagetable
- * @pages: Allocated pages.
- * @nr_pages: Number of allocated pages.
- *
- * This function releases memory allocated by kvm_alloc_cma().
- * It returns false when provided pages do not belong to contiguous area and
- * true otherwise.
- */
-bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
-{
- unsigned long pfn;
- unsigned long nr_chunk;
- struct kvm_cma *cma = &kvm_cma_area;
-
- if (!cma || !pages)
- return false;
-
- pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
-
- pfn = page_to_pfn(pages);
-
- if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
- return false;
-
- VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
- nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
-
- mutex_lock(&kvm_cma_mutex);
- bitmap_clear(cma->bitmap,
- (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
- nr_chunk);
- free_contig_range(pfn, nr_pages);
- mutex_unlock(&kvm_cma_mutex);
-
- return true;
-}
-
-static int __init kvm_cma_activate_area(unsigned long base_pfn,
- unsigned long count)
-{
- unsigned long pfn = base_pfn;
- unsigned i = count >> pageblock_order;
- struct zone *zone;
-
- WARN_ON_ONCE(!pfn_valid(pfn));
- zone = page_zone(pfn_to_page(pfn));
- do {
- unsigned j;
- base_pfn = pfn;
- for (j = pageblock_nr_pages; j; --j, pfn++) {
- WARN_ON_ONCE(!pfn_valid(pfn));
- /*
- * alloc_contig_range requires the pfn range
- * specified to be in the same zone. Make this
- * simple by forcing the entire CMA resv range
- * to be in the same zone.
- */
- if (page_zone(pfn_to_page(pfn)) != zone)
- return -EINVAL;
- }
- init_cma_reserved_pageblock(pfn_to_page(base_pfn));
- } while (--i);
- return 0;
-}
-
-static int __init kvm_cma_init_reserved_areas(void)
-{
- int bitmap_size, ret;
- unsigned long chunk_count;
- struct kvm_cma *cma = &kvm_cma_area;
-
- pr_debug("%s()\n", __func__);
- if (!cma->count)
- return 0;
- chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
- bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
- cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!cma->bitmap)
- return -ENOMEM;
-
- ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
- if (ret)
- goto error;
- return 0;
-
-error:
- kfree(cma->bitmap);
- return ret;
-}
-core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
deleted file mode 100644
index 655144f75fa5..000000000000
--- a/arch/powerpc/kvm/book3s_hv_cma.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA
- * for DMA mapping framework
- *
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
- */
-
-#ifndef __POWERPC_KVM_CMA_ALLOC_H__
-#define __POWERPC_KVM_CMA_ALLOC_H__
-/*
- * Both RMA and Hash page allocation will be multiple of 256K.
- */
-#define KVM_CMA_CHUNK_ORDER 18
-
-extern struct page *kvm_alloc_cma(unsigned long nr_pages,
- unsigned long align_pages);
-extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
-extern long kvm_cma_declare_contiguous(phys_addr_t size,
- phys_addr_t alignment) __init;
-#endif
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 7b6c10750179..d85e86aac7fb 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -33,6 +33,7 @@
#include <linux/export.h>
#include <asm/tlbflush.h>
+#include <asm/dma.h>
#include "mmu_decl.h"
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 534574a97ec9..3a104284b338 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -25,6 +25,7 @@
#include <asm/time.h>
#include <asm/uic.h>
#include <asm/ppc4xx.h>
+#include <asm/dma.h>
static __initdata struct of_device_id warp_of_bus[] = {
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 6e19b0ad5d26..3feffde9128d 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -13,6 +13,7 @@
#include <generated/utsrelease.h>
#include <linux/pci.h>
#include <linux/of.h>
+#include <asm/dma.h>
#include <asm/prom.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 03aabc0e16ac..2fe12046279e 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -24,6 +24,7 @@
#include <asm/i8259.h>
#include <asm/time.h>
#include <asm/udbg.h>
+#include <asm/dma.h>
extern void __flush_disable_L1(void);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bb63499fc5d3..4ae4af600d7d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -48,6 +48,8 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
config KEXEC
def_bool y
+ select CRYPTO
+ select CRYPTO_SHA256
config AUDIT_ARCH
def_bool y
@@ -146,6 +148,7 @@ config S390
select TTY
select VIRT_CPU_ACCOUNTING
select VIRT_TO_BUS
+ select ARCH_HAS_SG_CHAIN
config SCHED_OMIT_FRAME_POINTER
def_bool y
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 57892a8a9055..b3fea0722ff1 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -4,4 +4,5 @@ generic-y += clkdev.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
deleted file mode 100644
index 6d45ef6c12a7..000000000000
--- a/arch/s390/include/asm/scatterlist.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 2f947aba4bd4..aad209199f7e 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -8,5 +8,6 @@ generic-y += cputime.h
generic-y += hash.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
generic-y += xor.h
diff --git a/arch/score/include/asm/scatterlist.h b/arch/score/include/asm/scatterlist.h
deleted file mode 100644
index 9f533b8362c7..000000000000
--- a/arch/score/include/asm/scatterlist.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCORE_SCATTERLIST_H
-#define _ASM_SCORE_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* _ASM_SCORE_SCATTERLIST_H */
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 834b67c4db5a..09e12e1908a2 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -596,6 +596,8 @@ source kernel/Kconfig.hz
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
depends on SUPERH32 && MMU
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index d4d16e4be07c..bf5b3f5f4962 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -32,7 +32,8 @@ endif
cflags-$(CONFIG_CPU_SH2) := $(call cc-option,-m2,)
cflags-$(CONFIG_CPU_SH2A) += $(call cc-option,-m2a,) \
- $(call cc-option,-m2a-nofpu,)
+ $(call cc-option,-m2a-nofpu,) \
+ $(call cc-option,-m4-nofpu,)
cflags-$(CONFIG_CPU_SH3) := $(call cc-option,-m3,)
cflags-$(CONFIG_CPU_SH4) := $(call cc-option,-m4,) \
$(call cc-option,-mno-implicit-fp,-m4-nofpu)
diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig
index cfd5b90a8628..78bc97b1d027 100644
--- a/arch/sh/drivers/dma/Kconfig
+++ b/arch/sh/drivers/dma/Kconfig
@@ -12,9 +12,8 @@ config SH_DMA_IRQ_MULTI
default y if CPU_SUBTYPE_SH7750 || CPU_SUBTYPE_SH7751 || \
CPU_SUBTYPE_SH7750S || CPU_SUBTYPE_SH7750R || \
CPU_SUBTYPE_SH7751R || CPU_SUBTYPE_SH7091 || \
- CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7764 || \
- CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \
- CPU_SUBTYPE_SH7760
+ CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \
+ CPU_SUBTYPE_SH7785 || CPU_SUBTYPE_SH7760
config SH_DMA_API
depends on SH_DMA
diff --git a/arch/sh/include/cpu-sh4/cpu/dma-register.h b/arch/sh/include/cpu-sh4/cpu/dma-register.h
index 02788b6a03b7..9cd81e54056a 100644
--- a/arch/sh/include/cpu-sh4/cpu/dma-register.h
+++ b/arch/sh/include/cpu-sh4/cpu/dma-register.h
@@ -32,7 +32,6 @@
#define CHCR_TS_HIGH_SHIFT (20 - 2) /* 2 bits for shifted low TS */
#elif defined(CONFIG_CPU_SUBTYPE_SH7757) || \
defined(CONFIG_CPU_SUBTYPE_SH7763) || \
- defined(CONFIG_CPU_SUBTYPE_SH7764) || \
defined(CONFIG_CPU_SUBTYPE_SH7780) || \
defined(CONFIG_CPU_SUBTYPE_SH7785)
#define CHCR_TS_LOW_MASK 0x00000018
diff --git a/arch/sh/include/cpu-sh4a/cpu/dma.h b/arch/sh/include/cpu-sh4a/cpu/dma.h
index 89afb650ce25..8ceccceae844 100644
--- a/arch/sh/include/cpu-sh4a/cpu/dma.h
+++ b/arch/sh/include/cpu-sh4a/cpu/dma.h
@@ -14,8 +14,7 @@
#define DMTE4_IRQ evt2irq(0xb80)
#define DMAE0_IRQ evt2irq(0xbc0) /* DMA Error IRQ*/
#define SH_DMAC_BASE0 0xFE008020
-#elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \
- defined(CONFIG_CPU_SUBTYPE_SH7764)
+#elif defined(CONFIG_CPU_SUBTYPE_SH7763)
#define DMTE0_IRQ evt2irq(0x640)
#define DMTE4_IRQ evt2irq(0x780)
#define DMAE0_IRQ evt2irq(0x6c0)
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
index f579dd528198..c187b9579c21 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -307,7 +307,7 @@ static struct clk_lookup lookups[] = {
CLKDEV_ICK_ID("fck", "sh-tmu.0", &mstp_clks[HWBLK_TMU0]),
CLKDEV_ICK_ID("fck", "sh-tmu.1", &mstp_clks[HWBLK_TMU1]),
- CLKDEV_ICK_ID("fck", "sh-cmt-16.0", &mstp_clks[HWBLK_CMT]),
+ CLKDEV_ICK_ID("fck", "sh-cmt-32.0", &mstp_clks[HWBLK_CMT]),
CLKDEV_DEV_ID("sh-wdt.0", &mstp_clks[HWBLK_RWDT]),
CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[HWBLK_DMAC1]),
@@ -332,6 +332,8 @@ static struct clk_lookup lookups[] = {
CLKDEV_CON_ID("tsif0", &mstp_clks[HWBLK_TSIF]),
CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[HWBLK_USB1]),
CLKDEV_DEV_ID("renesas_usbhs.0", &mstp_clks[HWBLK_USB0]),
+ CLKDEV_CON_ID("usb1", &mstp_clks[HWBLK_USB1]),
+ CLKDEV_CON_ID("usb0", &mstp_clks[HWBLK_USB0]),
CLKDEV_CON_ID("2dg0", &mstp_clks[HWBLK_2DG]),
CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[HWBLK_SDHI0]),
CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[HWBLK_SDHI1]),
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 552c8fcf9416..d6d0a986c6e9 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -80,10 +80,8 @@ static int __init rtc_generic_init(void)
return -ENODEV;
pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
- if (IS_ERR(pdev))
- return PTR_ERR(pdev);
- return 0;
+ return PTR_ERR_OR_ZERO(pdev);
}
module_init(rtc_generic_init);
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index 74c03ecc4871..ecfc6b0c1da1 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -67,10 +67,8 @@ static int __init asids_debugfs_init(void)
NULL, &asids_debugfs_fops);
if (!asids_dentry)
return -ENOMEM;
- if (IS_ERR(asids_dentry))
- return PTR_ERR(asids_dentry);
- return 0;
+ return PTR_ERR_OR_ZERO(asids_dentry);
}
module_init(asids_debugfs_init);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 407c87d9879a..bff3192219df 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,6 +42,7 @@ config SPARC
select MODULES_USE_ELF_RELA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
+ select ARCH_HAS_SG_CHAIN
config SPARC32
def_bool !64BIT
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a45821818003..cdd1b447bb6c 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += mcs_spinlock.h
generic-y += module.h
generic-y += mutex.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += serial.h
generic-y += trace_clock.h
generic-y += types.h
diff --git a/arch/sparc/include/asm/scatterlist.h b/arch/sparc/include/asm/scatterlist.h
deleted file mode 100644
index 92bb638313f8..000000000000
--- a/arch/sparc/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _SPARC_SCATTERLIST_H
-#define _SPARC_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* !(_SPARC_SCATTERLIST_H) */
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 4f3006b600e3..a37cdfb331e9 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -192,6 +192,8 @@ source "kernel/Kconfig.hz"
config KEXEC
bool "kexec system call"
+ select CRYPTO
+ select CRYPTO_SHA256
---help---
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index 4918d91bc3a6..d19b13e3a59f 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -58,7 +58,7 @@ void *module_alloc(unsigned long size)
area->nr_pages = npages;
area->pages = pages;
- if (map_vm_area(area, prot_rwx, &pages)) {
+ if (map_vm_area(area, prot_rwx, pages)) {
vunmap(area->addr);
goto error;
}
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index a5e4b6068213..7bd64aa2e94a 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += param.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += sections.h
generic-y += switch_to.h
generic-y += topology.h
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index e5287d8517aa..61b6d51866f8 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -16,3 +16,7 @@ obj-$(CONFIG_IA32_EMULATION) += ia32/
obj-y += platform/
obj-y += net/
+
+ifeq ($(CONFIG_X86_64),y)
+obj-$(CONFIG_KEXEC) += purgatory/
+endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 66be0176be29..d520d333a1b5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -96,6 +96,7 @@ config X86
select IRQ_FORCED_THREADING
select HAVE_BPF_JIT if X86_64
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select ARCH_HAS_SG_CHAIN
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
@@ -1579,6 +1580,9 @@ source kernel/Kconfig.hz
config KEXEC
bool "kexec system call"
+ select BUILD_BIN2C
+ select CRYPTO
+ select CRYPTO_SHA256
---help---
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index c65fd9650467..c1aa36887843 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -183,6 +183,14 @@ archscripts: scripts_basic
archheaders:
$(Q)$(MAKE) $(build)=arch/x86/syscalls all
+archprepare:
+ifeq ($(CONFIG_KEXEC),y)
+# Build only for 64bit. No loaders for 32bit yet.
+ ifeq ($(CONFIG_X86_64),y)
+ $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c
+ endif
+endif
+
###
# Kernel objects
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 3ca9762e1649..3bf000fab0ae 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,6 +5,7 @@ genhdr-y += unistd_64.h
genhdr-y += unistd_x32.h
generic-y += clkdev.h
-generic-y += early_ioremap.h
generic-y += cputime.h
+generic-y += early_ioremap.h
generic-y += mcs_spinlock.h
+generic-y += scatterlist.h
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
new file mode 100644
index 000000000000..f498411f2500
--- /dev/null
+++ b/arch/x86/include/asm/crash.h
@@ -0,0 +1,9 @@
+#ifndef _ASM_X86_CRASH_H
+#define _ASM_X86_CRASH_H
+
+int crash_load_segments(struct kimage *image);
+int crash_copy_backup_region(struct kimage *image);
+int crash_setup_memmap_entries(struct kimage *image,
+ struct boot_params *params);
+
+#endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h
new file mode 100644
index 000000000000..d1b5d194e31d
--- /dev/null
+++ b/arch/x86/include/asm/kexec-bzimage64.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_KEXEC_BZIMAGE64_H
+#define _ASM_KEXEC_BZIMAGE64_H
+
+extern struct kexec_file_ops kexec_bzImage64_ops;
+
+#endif /* _ASM_KEXE_BZIMAGE64_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 17483a492f18..d2434c1cad05 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -23,6 +23,9 @@
#include <asm/page.h>
#include <asm/ptrace.h>
+#include <asm/bootparam.h>
+
+struct kimage;
/*
* KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
@@ -61,6 +64,10 @@
# define KEXEC_ARCH KEXEC_ARCH_X86_64
#endif
+/* Memory to backup during crash kdump */
+#define KEXEC_BACKUP_SRC_START (0UL)
+#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */
+
/*
* CPU does not save ss and sp on stack if execution is already
* running in kernel mode at the time of NMI occurrence. This code
@@ -160,6 +167,44 @@ struct kimage_arch {
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
+ /* Details of backup region */
+ unsigned long backup_src_start;
+ unsigned long backup_src_sz;
+
+ /* Physical address of backup segment */
+ unsigned long backup_load_addr;
+
+ /* Core ELF header buffer */
+ void *elf_headers;
+ unsigned long elf_headers_sz;
+ unsigned long elf_load_addr;
+};
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_64
+/*
+ * Number of elements and order of elements in this structure should match
+ * with the ones in arch/x86/purgatory/entry64.S. If you make a change here
+ * make an appropriate change in purgatory too.
+ */
+struct kexec_entry64_regs {
+ uint64_t rax;
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rbx;
+ uint64_t rsp;
+ uint64_t rbp;
+ uint64_t rsi;
+ uint64_t rdi;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+ uint64_t rip;
};
#endif
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 4064acae625d..01b493e5a99b 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -9,7 +9,6 @@
#ifdef CONFIG_NUMA
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
/*
* Too small node sizes may confuse the VM badly. Usually they
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 5be9063545d2..809abb335627 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -115,7 +115,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
native_set_pgd(pgd, native_make_pgd(0));
}
-extern void sync_global_pgds(unsigned long start, unsigned long end);
+extern void sync_global_pgds(unsigned long start, unsigned long end,
+ int removed);
/*
* Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
deleted file mode 100644
index 4240878b9d76..000000000000
--- a/arch/x86/include/asm/scatterlist.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_X86_SCATTERLIST_H
-#define _ASM_X86_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
-
-#endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 047f9ff2e36c..ece67cbe139d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -117,4 +117,5 @@ ifeq ($(CONFIG_X86_64),y)
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
obj-y += vsmp_64.o
+ obj-$(CONFIG_KEXEC) += kexec-bzimage64.o
endif
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 507de8066594..0553a34fa0df 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -4,9 +4,14 @@
* Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
*
* Copyright (C) IBM Corporation, 2004. All rights reserved.
+ * Copyright (C) Red Hat Inc., 2014. All rights reserved.
+ * Authors:
+ * Vivek Goyal <vgoyal@redhat.com>
*
*/
+#define pr_fmt(fmt) "kexec: " fmt
+
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/smp.h>
@@ -16,6 +21,7 @@
#include <linux/elf.h>
#include <linux/elfcore.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
@@ -28,6 +34,45 @@
#include <asm/reboot.h>
#include <asm/virtext.h>
+/* Alignment required for elf header segment */
+#define ELF_CORE_HEADER_ALIGN 4096
+
+/* This primarily represents number of split ranges due to exclusion */
+#define CRASH_MAX_RANGES 16
+
+struct crash_mem_range {
+ u64 start, end;
+};
+
+struct crash_mem {
+ unsigned int nr_ranges;
+ struct crash_mem_range ranges[CRASH_MAX_RANGES];
+};
+
+/* Misc data about ram ranges needed to prepare elf headers */
+struct crash_elf_data {
+ struct kimage *image;
+ /*
+ * Total number of ram ranges we have after various adjustments for
+ * GART, crash reserved region etc.
+ */
+ unsigned int max_nr_ranges;
+ unsigned long gart_start, gart_end;
+
+ /* Pointer to elf header */
+ void *ehdr;
+ /* Pointer to next phdr */
+ void *bufp;
+ struct crash_mem mem;
+};
+
+/* Used while preparing memory map entries for second kernel */
+struct crash_memmap_data {
+ struct boot_params *params;
+ /* Type of memory */
+ unsigned int type;
+};
+
int in_crash_kexec;
/*
@@ -39,6 +84,7 @@ int in_crash_kexec;
*/
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
+unsigned long crash_zero_bytes;
static inline void cpu_crash_vmclear_loaded_vmcss(void)
{
@@ -135,3 +181,520 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#endif
crash_save_cpu(regs, safe_smp_processor_id());
}
+
+#ifdef CONFIG_X86_64
+
+static int get_nr_ram_ranges_callback(unsigned long start_pfn,
+ unsigned long nr_pfn, void *arg)
+{
+ int *nr_ranges = arg;
+
+ (*nr_ranges)++;
+ return 0;
+}
+
+static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
+{
+ struct crash_elf_data *ced = arg;
+
+ ced->gart_start = start;
+ ced->gart_end = end;
+
+ /* Not expecting more than 1 gart aperture */
+ return 1;
+}
+
+
+/* Gather all the required information to prepare elf headers for ram regions */
+static void fill_up_crash_elf_data(struct crash_elf_data *ced,
+ struct kimage *image)
+{
+ unsigned int nr_ranges = 0;
+
+ ced->image = image;
+
+ walk_system_ram_range(0, -1, &nr_ranges,
+ get_nr_ram_ranges_callback);
+
+ ced->max_nr_ranges = nr_ranges;
+
+ /*
+ * We don't create ELF headers for GART aperture as an attempt
+ * to dump this memory in second kernel leads to hang/crash.
+ * If gart aperture is present, one needs to exclude that region
+ * and that could lead to need of extra phdr.
+ */
+ walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
+ ced, get_gart_ranges_callback);
+
+ /*
+ * If we have gart region, excluding that could potentially split
+ * a memory range, resulting in extra header. Account for that.
+ */
+ if (ced->gart_end)
+ ced->max_nr_ranges++;
+
+ /* Exclusion of crash region could split memory ranges */
+ ced->max_nr_ranges++;
+
+ /* If crashk_low_res is not 0, another range split possible */
+ if (crashk_low_res.end != 0)
+ ced->max_nr_ranges++;
+}
+
+static int exclude_mem_range(struct crash_mem *mem,
+ unsigned long long mstart, unsigned long long mend)
+{
+ int i, j;
+ unsigned long long start, end;
+ struct crash_mem_range temp_range = {0, 0};
+
+ for (i = 0; i < mem->nr_ranges; i++) {
+ start = mem->ranges[i].start;
+ end = mem->ranges[i].end;
+
+ if (mstart > end || mend < start)
+ continue;
+
+ /* Truncate any area outside of range */
+ if (mstart < start)
+ mstart = start;
+ if (mend > end)
+ mend = end;
+
+ /* Found completely overlapping range */
+ if (mstart == start && mend == end) {
+ mem->ranges[i].start = 0;
+ mem->ranges[i].end = 0;
+ if (i < mem->nr_ranges - 1) {
+ /* Shift rest of the ranges to left */
+ for (j = i; j < mem->nr_ranges - 1; j++) {
+ mem->ranges[j].start =
+ mem->ranges[j+1].start;
+ mem->ranges[j].end =
+ mem->ranges[j+1].end;
+ }
+ }
+ mem->nr_ranges--;
+ return 0;
+ }
+
+ if (mstart > start && mend < end) {
+ /* Split original range */
+ mem->ranges[i].end = mstart - 1;
+ temp_range.start = mend + 1;
+ temp_range.end = end;
+ } else if (mstart != start)
+ mem->ranges[i].end = mstart - 1;
+ else
+ mem->ranges[i].start = mend + 1;
+ break;
+ }
+
+ /* If a split happend, add the split to array */
+ if (!temp_range.end)
+ return 0;
+
+ /* Split happened */
+ if (i == CRASH_MAX_RANGES - 1) {
+ pr_err("Too many crash ranges after split\n");
+ return -ENOMEM;
+ }
+
+ /* Location where new range should go */
+ j = i + 1;
+ if (j < mem->nr_ranges) {
+ /* Move over all ranges one slot towards the end */
+ for (i = mem->nr_ranges - 1; i >= j; i--)
+ mem->ranges[i + 1] = mem->ranges[i];
+ }
+
+ mem->ranges[j].start = temp_range.start;
+ mem->ranges[j].end = temp_range.end;
+ mem->nr_ranges++;
+ return 0;
+}
+
+/*
+ * Look for any unwanted ranges between mstart, mend and remove them. This
+ * might lead to split and split ranges are put in ced->mem.ranges[] array
+ */
+static int elf_header_exclude_ranges(struct crash_elf_data *ced,
+ unsigned long long mstart, unsigned long long mend)
+{
+ struct crash_mem *cmem = &ced->mem;
+ int ret = 0;
+
+ memset(cmem->ranges, 0, sizeof(cmem->ranges));
+
+ cmem->ranges[0].start = mstart;
+ cmem->ranges[0].end = mend;
+ cmem->nr_ranges = 1;
+
+ /* Exclude crashkernel region */
+ ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+ if (ret)
+ return ret;
+
+ ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+ if (ret)
+ return ret;
+
+ /* Exclude GART region */
+ if (ced->gart_end) {
+ ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
+{
+ struct crash_elf_data *ced = arg;
+ Elf64_Ehdr *ehdr;
+ Elf64_Phdr *phdr;
+ unsigned long mstart, mend;
+ struct kimage *image = ced->image;
+ struct crash_mem *cmem;
+ int ret, i;
+
+ ehdr = ced->ehdr;
+
+ /* Exclude unwanted mem ranges */
+ ret = elf_header_exclude_ranges(ced, start, end);
+ if (ret)
+ return ret;
+
+ /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */
+ cmem = &ced->mem;
+
+ for (i = 0; i < cmem->nr_ranges; i++) {
+ mstart = cmem->ranges[i].start;
+ mend = cmem->ranges[i].end;
+
+ phdr = ced->bufp;
+ ced->bufp += sizeof(Elf64_Phdr);
+
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_offset = mstart;
+
+ /*
+ * If a range matches backup region, adjust offset to backup
+ * segment.
+ */
+ if (mstart == image->arch.backup_src_start &&
+ (mend - mstart + 1) == image->arch.backup_src_sz)
+ phdr->p_offset = image->arch.backup_load_addr;
+
+ phdr->p_paddr = mstart;
+ phdr->p_vaddr = (unsigned long long) __va(mstart);
+ phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
+ phdr->p_align = 0;
+ ehdr->e_phnum++;
+ pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
+ phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
+ ehdr->e_phnum, phdr->p_offset);
+ }
+
+ return ret;
+}
+
+static int prepare_elf64_headers(struct crash_elf_data *ced,
+ void **addr, unsigned long *sz)
+{
+ Elf64_Ehdr *ehdr;
+ Elf64_Phdr *phdr;
+ unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
+ unsigned char *buf, *bufp;
+ unsigned int cpu;
+ unsigned long long notes_addr;
+ int ret;
+
+ /* extra phdr for vmcoreinfo elf note */
+ nr_phdr = nr_cpus + 1;
+ nr_phdr += ced->max_nr_ranges;
+
+ /*
+ * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
+ * area on x86_64 (ffffffff80000000 - ffffffffa0000000).
+ * I think this is required by tools like gdb. So same physical
+ * memory will be mapped in two elf headers. One will contain kernel
+ * text virtual addresses and other will have __va(physical) addresses.
+ */
+
+ nr_phdr++;
+ elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
+ elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
+
+ buf = vzalloc(elf_sz);
+ if (!buf)
+ return -ENOMEM;
+
+ bufp = buf;
+ ehdr = (Elf64_Ehdr *)bufp;
+ bufp += sizeof(Elf64_Ehdr);
+ memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+ ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+ ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
+ ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+ ehdr->e_ident[EI_OSABI] = ELF_OSABI;
+ memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+ ehdr->e_type = ET_CORE;
+ ehdr->e_machine = ELF_ARCH;
+ ehdr->e_version = EV_CURRENT;
+ ehdr->e_phoff = sizeof(Elf64_Ehdr);
+ ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr->e_phentsize = sizeof(Elf64_Phdr);
+
+ /* Prepare one phdr of type PT_NOTE for each present cpu */
+ for_each_present_cpu(cpu) {
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = PT_NOTE;
+ notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
+ phdr->p_offset = phdr->p_paddr = notes_addr;
+ phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
+ (ehdr->e_phnum)++;
+ }
+
+ /* Prepare one PT_NOTE header for vmcoreinfo */
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = PT_NOTE;
+ phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
+ phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
+ (ehdr->e_phnum)++;
+
+#ifdef CONFIG_X86_64
+ /* Prepare PT_LOAD type program header for kernel text region */
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_vaddr = (Elf64_Addr)_text;
+ phdr->p_filesz = phdr->p_memsz = _end - _text;
+ phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
+ (ehdr->e_phnum)++;
+#endif
+
+ /* Prepare PT_LOAD headers for system ram chunks. */
+ ced->ehdr = ehdr;
+ ced->bufp = bufp;
+ ret = walk_system_ram_res(0, -1, ced,
+ prepare_elf64_ram_headers_callback);
+ if (ret < 0)
+ return ret;
+
+ *addr = buf;
+ *sz = elf_sz;
+ return 0;
+}
+
+/* Prepare elf headers. Return addr and size */
+static int prepare_elf_headers(struct kimage *image, void **addr,
+ unsigned long *sz)
+{
+ struct crash_elf_data *ced;
+ int ret;
+
+ ced = kzalloc(sizeof(*ced), GFP_KERNEL);
+ if (!ced)
+ return -ENOMEM;
+
+ fill_up_crash_elf_data(ced, image);
+
+ /* By default prepare 64bit headers */
+ ret = prepare_elf64_headers(ced, addr, sz);
+ kfree(ced);
+ return ret;
+}
+
+static int add_e820_entry(struct boot_params *params, struct e820entry *entry)
+{
+ unsigned int nr_e820_entries;
+
+ nr_e820_entries = params->e820_entries;
+ if (nr_e820_entries >= E820MAX)
+ return 1;
+
+ memcpy(&params->e820_map[nr_e820_entries], entry,
+ sizeof(struct e820entry));
+ params->e820_entries++;
+ return 0;
+}
+
+static int memmap_entry_callback(u64 start, u64 end, void *arg)
+{
+ struct crash_memmap_data *cmd = arg;
+ struct boot_params *params = cmd->params;
+ struct e820entry ei;
+
+ ei.addr = start;
+ ei.size = end - start + 1;
+ ei.type = cmd->type;
+ add_e820_entry(params, &ei);
+
+ return 0;
+}
+
+static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
+ unsigned long long mstart,
+ unsigned long long mend)
+{
+ unsigned long start, end;
+ int ret = 0;
+
+ cmem->ranges[0].start = mstart;
+ cmem->ranges[0].end = mend;
+ cmem->nr_ranges = 1;
+
+ /* Exclude Backup region */
+ start = image->arch.backup_load_addr;
+ end = start + image->arch.backup_src_sz - 1;
+ ret = exclude_mem_range(cmem, start, end);
+ if (ret)
+ return ret;
+
+ /* Exclude elf header region */
+ start = image->arch.elf_load_addr;
+ end = start + image->arch.elf_headers_sz - 1;
+ return exclude_mem_range(cmem, start, end);
+}
+
+/* Prepare memory map for crash dump kernel */
+int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
+{
+ int i, ret = 0;
+ unsigned long flags;
+ struct e820entry ei;
+ struct crash_memmap_data cmd;
+ struct crash_mem *cmem;
+
+ cmem = vzalloc(sizeof(struct crash_mem));
+ if (!cmem)
+ return -ENOMEM;
+
+ memset(&cmd, 0, sizeof(struct crash_memmap_data));
+ cmd.params = params;
+
+ /* Add first 640K segment */
+ ei.addr = image->arch.backup_src_start;
+ ei.size = image->arch.backup_src_sz;
+ ei.type = E820_RAM;
+ add_e820_entry(params, &ei);
+
+ /* Add ACPI tables */
+ cmd.type = E820_ACPI;
+ flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
+ memmap_entry_callback);
+
+ /* Add ACPI Non-volatile Storage */
+ cmd.type = E820_NVS;
+ walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
+ memmap_entry_callback);
+
+ /* Add crashk_low_res region */
+ if (crashk_low_res.end) {
+ ei.addr = crashk_low_res.start;
+ ei.size = crashk_low_res.end - crashk_low_res.start + 1;
+ ei.type = E820_RAM;
+ add_e820_entry(params, &ei);
+ }
+
+ /* Exclude some ranges from crashk_res and add rest to memmap */
+ ret = memmap_exclude_ranges(image, cmem, crashk_res.start,
+ crashk_res.end);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < cmem->nr_ranges; i++) {
+ ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1;
+
+ /* If entry is less than a page, skip it */
+ if (ei.size < PAGE_SIZE)
+ continue;
+ ei.addr = cmem->ranges[i].start;
+ ei.type = E820_RAM;
+ add_e820_entry(params, &ei);
+ }
+
+out:
+ vfree(cmem);
+ return ret;
+}
+
+static int determine_backup_region(u64 start, u64 end, void *arg)
+{
+ struct kimage *image = arg;
+
+ image->arch.backup_src_start = start;
+ image->arch.backup_src_sz = end - start + 1;
+
+ /* Expecting only one range for backup region */
+ return 1;
+}
+
+int crash_load_segments(struct kimage *image)
+{
+ unsigned long src_start, src_sz, elf_sz;
+ void *elf_addr;
+ int ret;
+
+ /*
+ * Determine and load a segment for backup area. First 640K RAM
+ * region is backup source
+ */
+
+ ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END,
+ image, determine_backup_region);
+
+ /* Zero or postive return values are ok */
+ if (ret < 0)
+ return ret;
+
+ src_start = image->arch.backup_src_start;
+ src_sz = image->arch.backup_src_sz;
+
+ /* Add backup segment. */
+ if (src_sz) {
+ /*
+ * Ideally there is no source for backup segment. This is
+ * copied in purgatory after crash. Just add a zero filled
+ * segment for now to make sure checksum logic works fine.
+ */
+ ret = kexec_add_buffer(image, (char *)&crash_zero_bytes,
+ sizeof(crash_zero_bytes), src_sz,
+ PAGE_SIZE, 0, -1, 0,
+ &image->arch.backup_load_addr);
+ if (ret)
+ return ret;
+ pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
+ image->arch.backup_load_addr, src_start, src_sz);
+ }
+
+ /* Prepare elf headers and add a segment */
+ ret = prepare_elf_headers(image, &elf_addr, &elf_sz);
+ if (ret)
+ return ret;
+
+ image->arch.elf_headers = elf_addr;
+ image->arch.elf_headers_sz = elf_sz;
+
+ ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz,
+ ELF_CORE_HEADER_ALIGN, 0, -1, 0,
+ &image->arch.elf_load_addr);
+ if (ret) {
+ vfree((void *)image->arch.elf_headers);
+ return ret;
+ }
+ pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->arch.elf_load_addr, elf_sz, elf_sz);
+
+ return ret;
+}
+
+#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
new file mode 100644
index 000000000000..623e6c58081f
--- /dev/null
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -0,0 +1,532 @@
+/*
+ * Kexec bzImage loader
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ * Authors:
+ * Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#define pr_fmt(fmt) "kexec-bzImage64: " fmt
+
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/efi.h>
+
+#include <asm/bootparam.h>
+#include <asm/setup.h>
+#include <asm/crash.h>
+#include <asm/efi.h>
+
+#define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */
+
+/*
+ * Defines lowest physical address for various segments. Not sure where
+ * exactly these limits came from. Current bzimage64 loader in kexec-tools
+ * uses these so I am retaining it. It can be changed over time as we gain
+ * more insight.
+ */
+#define MIN_PURGATORY_ADDR 0x3000
+#define MIN_BOOTPARAM_ADDR 0x3000
+#define MIN_KERNEL_LOAD_ADDR 0x100000
+#define MIN_INITRD_LOAD_ADDR 0x1000000
+
+/*
+ * This is a place holder for all boot loader specific data structure which
+ * gets allocated in one call but gets freed much later during cleanup
+ * time. Right now there is only one field but it can grow as need be.
+ */
+struct bzimage64_data {
+ /*
+ * Temporary buffer to hold bootparams buffer. This should be
+ * freed once the bootparam segment has been loaded.
+ */
+ void *bootparams_buf;
+};
+
+static int setup_initrd(struct boot_params *params,
+ unsigned long initrd_load_addr, unsigned long initrd_len)
+{
+ params->hdr.ramdisk_image = initrd_load_addr & 0xffffffffUL;
+ params->hdr.ramdisk_size = initrd_len & 0xffffffffUL;
+
+ params->ext_ramdisk_image = initrd_load_addr >> 32;
+ params->ext_ramdisk_size = initrd_len >> 32;
+
+ return 0;
+}
+
+static int setup_cmdline(struct kimage *image, struct boot_params *params,
+ unsigned long bootparams_load_addr,
+ unsigned long cmdline_offset, char *cmdline,
+ unsigned long cmdline_len)
+{
+ char *cmdline_ptr = ((char *)params) + cmdline_offset;
+ unsigned long cmdline_ptr_phys, len;
+ uint32_t cmdline_low_32, cmdline_ext_32;
+
+ memcpy(cmdline_ptr, cmdline, cmdline_len);
+ if (image->type == KEXEC_TYPE_CRASH) {
+ len = sprintf(cmdline_ptr + cmdline_len - 1,
+ " elfcorehdr=0x%lx", image->arch.elf_load_addr);
+ cmdline_len += len;
+ }
+ cmdline_ptr[cmdline_len - 1] = '\0';
+
+ pr_debug("Final command line is: %s\n", cmdline_ptr);
+ cmdline_ptr_phys = bootparams_load_addr + cmdline_offset;
+ cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL;
+ cmdline_ext_32 = cmdline_ptr_phys >> 32;
+
+ params->hdr.cmd_line_ptr = cmdline_low_32;
+ if (cmdline_ext_32)
+ params->ext_cmd_line_ptr = cmdline_ext_32;
+
+ return 0;
+}
+
+static int setup_e820_entries(struct boot_params *params)
+{
+ unsigned int nr_e820_entries;
+
+ nr_e820_entries = e820_saved.nr_map;
+
+ /* TODO: Pass entries more than E820MAX in bootparams setup data */
+ if (nr_e820_entries > E820MAX)
+ nr_e820_entries = E820MAX;
+
+ params->e820_entries = nr_e820_entries;
+ memcpy(&params->e820_map, &e820_saved.map,
+ nr_e820_entries * sizeof(struct e820entry));
+
+ return 0;
+}
+
+#ifdef CONFIG_EFI
+static int setup_efi_info_memmap(struct boot_params *params,
+ unsigned long params_load_addr,
+ unsigned int efi_map_offset,
+ unsigned int efi_map_sz)
+{
+ void *efi_map = (void *)params + efi_map_offset;
+ unsigned long efi_map_phys_addr = params_load_addr + efi_map_offset;
+ struct efi_info *ei = &params->efi_info;
+
+ if (!efi_map_sz)
+ return 0;
+
+ efi_runtime_map_copy(efi_map, efi_map_sz);
+
+ ei->efi_memmap = efi_map_phys_addr & 0xffffffff;
+ ei->efi_memmap_hi = efi_map_phys_addr >> 32;
+ ei->efi_memmap_size = efi_map_sz;
+
+ return 0;
+}
+
+static int
+prepare_add_efi_setup_data(struct boot_params *params,
+ unsigned long params_load_addr,
+ unsigned int efi_setup_data_offset)
+{
+ unsigned long setup_data_phys;
+ struct setup_data *sd = (void *)params + efi_setup_data_offset;
+ struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data);
+
+ esd->fw_vendor = efi.fw_vendor;
+ esd->runtime = efi.runtime;
+ esd->tables = efi.config_table;
+ esd->smbios = efi.smbios;
+
+ sd->type = SETUP_EFI;
+ sd->len = sizeof(struct efi_setup_data);
+
+ /* Add setup data */
+ setup_data_phys = params_load_addr + efi_setup_data_offset;
+ sd->next = params->hdr.setup_data;
+ params->hdr.setup_data = setup_data_phys;
+
+ return 0;
+}
+
+static int
+setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
+ unsigned int efi_map_offset, unsigned int efi_map_sz,
+ unsigned int efi_setup_data_offset)
+{
+ struct efi_info *current_ei = &boot_params.efi_info;
+ struct efi_info *ei = &params->efi_info;
+
+ if (!current_ei->efi_memmap_size)
+ return 0;
+
+ /*
+ * If 1:1 mapping is not enabled, second kernel can not setup EFI
+ * and use EFI run time services. User space will have to pass
+ * acpi_rsdp=<addr> on kernel command line to make second kernel boot
+ * without efi.
+ */
+ if (efi_enabled(EFI_OLD_MEMMAP))
+ return 0;
+
+ ei->efi_loader_signature = current_ei->efi_loader_signature;
+ ei->efi_systab = current_ei->efi_systab;
+ ei->efi_systab_hi = current_ei->efi_systab_hi;
+
+ ei->efi_memdesc_version = current_ei->efi_memdesc_version;
+ ei->efi_memdesc_size = efi_get_runtime_map_desc_size();
+
+ setup_efi_info_memmap(params, params_load_addr, efi_map_offset,
+ efi_map_sz);
+ prepare_add_efi_setup_data(params, params_load_addr,
+ efi_setup_data_offset);
+ return 0;
+}
+#endif /* CONFIG_EFI */
+
+static int
+setup_boot_parameters(struct kimage *image, struct boot_params *params,
+ unsigned long params_load_addr,
+ unsigned int efi_map_offset, unsigned int efi_map_sz,
+ unsigned int efi_setup_data_offset)
+{
+ unsigned int nr_e820_entries;
+ unsigned long long mem_k, start, end;
+ int i, ret = 0;
+
+ /* Get subarch from existing bootparams */
+ params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
+
+ /* Copying screen_info will do? */
+ memcpy(&params->screen_info, &boot_params.screen_info,
+ sizeof(struct screen_info));
+
+ /* Fill in memsize later */
+ params->screen_info.ext_mem_k = 0;
+ params->alt_mem_k = 0;
+
+ /* Default APM info */
+ memset(&params->apm_bios_info, 0, sizeof(params->apm_bios_info));
+
+ /* Default drive info */
+ memset(&params->hd0_info, 0, sizeof(params->hd0_info));
+ memset(&params->hd1_info, 0, sizeof(params->hd1_info));
+
+ /* Default sysdesc table */
+ params->sys_desc_table.length = 0;
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = crash_setup_memmap_entries(image, params);
+ if (ret)
+ return ret;
+ } else
+ setup_e820_entries(params);
+
+ nr_e820_entries = params->e820_entries;
+
+ for (i = 0; i < nr_e820_entries; i++) {
+ if (params->e820_map[i].type != E820_RAM)
+ continue;
+ start = params->e820_map[i].addr;
+ end = params->e820_map[i].addr + params->e820_map[i].size - 1;
+
+ if ((start <= 0x100000) && end > 0x100000) {
+ mem_k = (end >> 10) - (0x100000 >> 10);
+ params->screen_info.ext_mem_k = mem_k;
+ params->alt_mem_k = mem_k;
+ if (mem_k > 0xfc00)
+ params->screen_info.ext_mem_k = 0xfc00; /* 64M*/
+ if (mem_k > 0xffffffff)
+ params->alt_mem_k = 0xffffffff;
+ }
+ }
+
+#ifdef CONFIG_EFI
+ /* Setup EFI state */
+ setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz,
+ efi_setup_data_offset);
+#endif
+
+ /* Setup EDD info */
+ memcpy(params->eddbuf, boot_params.eddbuf,
+ EDDMAXNR * sizeof(struct edd_info));
+ params->eddbuf_entries = boot_params.eddbuf_entries;
+
+ memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer,
+ EDD_MBR_SIG_MAX * sizeof(unsigned int));
+
+ return ret;
+}
+
+int bzImage64_probe(const char *buf, unsigned long len)
+{
+ int ret = -ENOEXEC;
+ struct setup_header *header;
+
+ /* kernel should be atleast two sectors long */
+ if (len < 2 * 512) {
+ pr_err("File is too short to be a bzImage\n");
+ return ret;
+ }
+
+ header = (struct setup_header *)(buf + offsetof(struct boot_params, hdr));
+ if (memcmp((char *)&header->header, "HdrS", 4) != 0) {
+ pr_err("Not a bzImage\n");
+ return ret;
+ }
+
+ if (header->boot_flag != 0xAA55) {
+ pr_err("No x86 boot sector present\n");
+ return ret;
+ }
+
+ if (header->version < 0x020C) {
+ pr_err("Must be at least protocol version 2.12\n");
+ return ret;
+ }
+
+ if (!(header->loadflags & LOADED_HIGH)) {
+ pr_err("zImage not a bzImage\n");
+ return ret;
+ }
+
+ if (!(header->xloadflags & XLF_KERNEL_64)) {
+ pr_err("Not a bzImage64. XLF_KERNEL_64 is not set.\n");
+ return ret;
+ }
+
+ if (!(header->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)) {
+ pr_err("XLF_CAN_BE_LOADED_ABOVE_4G is not set.\n");
+ return ret;
+ }
+
+ /*
+ * Can't handle 32bit EFI as it does not allow loading kernel
+ * above 4G. This should be handled by 32bit bzImage loader
+ */
+ if (efi_enabled(EFI_RUNTIME_SERVICES) && !efi_enabled(EFI_64BIT)) {
+ pr_debug("EFI is 32 bit. Can't load kernel above 4G.\n");
+ return ret;
+ }
+
+ /* I've got a bzImage */
+ pr_debug("It's a relocatable bzImage64\n");
+ ret = 0;
+
+ return ret;
+}
+
+void *bzImage64_load(struct kimage *image, char *kernel,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+
+ struct setup_header *header;
+ int setup_sects, kern16_size, ret = 0;
+ unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
+ struct boot_params *params;
+ unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
+ unsigned long purgatory_load_addr;
+ unsigned long kernel_bufsz, kernel_memsz, kernel_align;
+ char *kernel_buf;
+ struct bzimage64_data *ldata;
+ struct kexec_entry64_regs regs64;
+ void *stack;
+ unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
+ unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
+
+ header = (struct setup_header *)(kernel + setup_hdr_offset);
+ setup_sects = header->setup_sects;
+ if (setup_sects == 0)
+ setup_sects = 4;
+
+ kern16_size = (setup_sects + 1) * 512;
+ if (kernel_len < kern16_size) {
+ pr_err("bzImage truncated\n");
+ return ERR_PTR(-ENOEXEC);
+ }
+
+ if (cmdline_len > header->cmdline_size) {
+ pr_err("Kernel command line too long\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ /*
+ * In case of crash dump, we will append elfcorehdr=<addr> to
+ * command line. Make sure it does not overflow
+ */
+ if (cmdline_len + MAX_ELFCOREHDR_STR_LEN > header->cmdline_size) {
+ pr_debug("Appending elfcorehdr=<addr> to command line exceeds maximum allowed length\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* Allocate and load backup region */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = crash_load_segments(image);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ /*
+ * Load purgatory. For 64bit entry point, purgatory code can be
+ * anywhere.
+ */
+ ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1,
+ &purgatory_load_addr);
+ if (ret) {
+ pr_err("Loading purgatory failed\n");
+ return ERR_PTR(ret);
+ }
+
+ pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+
+
+ /*
+ * Load Bootparams and cmdline and space for efi stuff.
+ *
+ * Allocate memory together for multiple data structures so
+ * that they all can go in single area/segment and we don't
+ * have to create separate segment for each. Keeps things
+ * little bit simple
+ */
+ efi_map_sz = efi_get_runtime_map_size();
+ efi_map_sz = ALIGN(efi_map_sz, 16);
+ params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
+ MAX_ELFCOREHDR_STR_LEN;
+ params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
+ params_misc_sz = params_cmdline_sz + efi_map_sz +
+ sizeof(struct setup_data) +
+ sizeof(struct efi_setup_data);
+
+ params = kzalloc(params_misc_sz, GFP_KERNEL);
+ if (!params)
+ return ERR_PTR(-ENOMEM);
+ efi_map_offset = params_cmdline_sz;
+ efi_setup_data_offset = efi_map_offset + efi_map_sz;
+
+ /* Copy setup header onto bootparams. Documentation/x86/boot.txt */
+ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
+
+ /* Is there a limit on setup header size? */
+ memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
+
+ ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
+ params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
+ ULONG_MAX, 1, &bootparam_load_addr);
+ if (ret)
+ goto out_free_params;
+ pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ bootparam_load_addr, params_misc_sz, params_misc_sz);
+
+ /* Load kernel */
+ kernel_buf = kernel + kern16_size;
+ kernel_bufsz = kernel_len - kern16_size;
+ kernel_memsz = PAGE_ALIGN(header->init_size);
+ kernel_align = header->kernel_alignment;
+
+ ret = kexec_add_buffer(image, kernel_buf,
+ kernel_bufsz, kernel_memsz, kernel_align,
+ MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
+ &kernel_load_addr);
+ if (ret)
+ goto out_free_params;
+
+ pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kernel_load_addr, kernel_memsz, kernel_memsz);
+
+ /* Load initrd high */
+ if (initrd) {
+ ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
+ PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
+ ULONG_MAX, 1, &initrd_load_addr);
+ if (ret)
+ goto out_free_params;
+
+ pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ initrd_load_addr, initrd_len, initrd_len);
+
+ setup_initrd(params, initrd_load_addr, initrd_len);
+ }
+
+ setup_cmdline(image, params, bootparam_load_addr,
+ sizeof(struct boot_params), cmdline, cmdline_len);
+
+ /* bootloader info. Do we need a separate ID for kexec kernel loader? */
+ params->hdr.type_of_loader = 0x0D << 4;
+ params->hdr.loadflags = 0;
+
+ /* Setup purgatory regs for entry */
+ ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+ sizeof(regs64), 1);
+ if (ret)
+ goto out_free_params;
+
+ regs64.rbx = 0; /* Bootstrap Processor */
+ regs64.rsi = bootparam_load_addr;
+ regs64.rip = kernel_load_addr + 0x200;
+ stack = kexec_purgatory_get_symbol_addr(image, "stack_end");
+ if (IS_ERR(stack)) {
+ pr_err("Could not find address of symbol stack_end\n");
+ ret = -EINVAL;
+ goto out_free_params;
+ }
+
+ regs64.rsp = (unsigned long)stack;
+ ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+ sizeof(regs64), 0);
+ if (ret)
+ goto out_free_params;
+
+ ret = setup_boot_parameters(image, params, bootparam_load_addr,
+ efi_map_offset, efi_map_sz,
+ efi_setup_data_offset);
+ if (ret)
+ goto out_free_params;
+
+ /* Allocate loader specific data */
+ ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL);
+ if (!ldata) {
+ ret = -ENOMEM;
+ goto out_free_params;
+ }
+
+ /*
+ * Store pointer to params so that it could be freed after loading
+ * params segment has been loaded and contents have been copied
+ * somewhere else.
+ */
+ ldata->bootparams_buf = params;
+ return ldata;
+
+out_free_params:
+ kfree(params);
+ return ERR_PTR(ret);
+}
+
+/* This cleanup function is called after various segments have been loaded */
+int bzImage64_cleanup(void *loader_data)
+{
+ struct bzimage64_data *ldata = loader_data;
+
+ if (!ldata)
+ return 0;
+
+ kfree(ldata->bootparams_buf);
+ ldata->bootparams_buf = NULL;
+
+ return 0;
+}
+
+struct kexec_file_ops kexec_bzImage64_ops = {
+ .probe = bzImage64_probe,
+ .load = bzImage64_load,
+ .cleanup = bzImage64_cleanup,
+};
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 679cef0791cd..9330434da777 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -6,6 +6,8 @@
* Version 2. See the file COPYING for more details.
*/
+#define pr_fmt(fmt) "kexec: " fmt
+
#include <linux/mm.h>
#include <linux/kexec.h>
#include <linux/string.h>
@@ -21,6 +23,11 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/debugreg.h>
+#include <asm/kexec-bzimage64.h>
+
+static struct kexec_file_ops *kexec_file_loaders[] = {
+ &kexec_bzImage64_ops,
+};
static void free_transition_pgtable(struct kimage *image)
{
@@ -171,6 +178,38 @@ static void load_segments(void)
);
}
+/* Update purgatory as needed after various image segments have been prepared */
+static int arch_update_purgatory(struct kimage *image)
+{
+ int ret = 0;
+
+ if (!image->file_mode)
+ return 0;
+
+ /* Setup copying of backup region */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
+ &image->arch.backup_load_addr,
+ sizeof(image->arch.backup_load_addr), 0);
+ if (ret)
+ return ret;
+
+ ret = kexec_purgatory_get_set_symbol(image, "backup_src",
+ &image->arch.backup_src_start,
+ sizeof(image->arch.backup_src_start), 0);
+ if (ret)
+ return ret;
+
+ ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
+ &image->arch.backup_src_sz,
+ sizeof(image->arch.backup_src_sz), 0);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
int machine_kexec_prepare(struct kimage *image)
{
unsigned long start_pgtable;
@@ -184,6 +223,11 @@ int machine_kexec_prepare(struct kimage *image)
if (result)
return result;
+ /* update purgatory as needed */
+ result = arch_update_purgatory(image);
+ if (result)
+ return result;
+
return 0;
}
@@ -283,3 +327,187 @@ void arch_crash_save_vmcoreinfo(void)
(unsigned long)&_text - __START_KERNEL);
}
+/* arch-dependent functionality related to kexec file-based syscall */
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ int i, ret = -ENOEXEC;
+ struct kexec_file_ops *fops;
+
+ for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
+ fops = kexec_file_loaders[i];
+ if (!fops || !fops->probe)
+ continue;
+
+ ret = fops->probe(buf, buf_len);
+ if (!ret) {
+ image->fops = fops;
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image)
+{
+ vfree(image->arch.elf_headers);
+ image->arch.elf_headers = NULL;
+
+ if (!image->fops || !image->fops->load)
+ return ERR_PTR(-ENOEXEC);
+
+ return image->fops->load(image, image->kernel_buf,
+ image->kernel_buf_len, image->initrd_buf,
+ image->initrd_buf_len, image->cmdline_buf,
+ image->cmdline_buf_len);
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ if (!image->fops || !image->fops->cleanup)
+ return 0;
+
+ return image->fops->cleanup(image->image_loader_data);
+}
+
+/*
+ * Apply purgatory relocations.
+ *
+ * ehdr: Pointer to elf headers
+ * sechdrs: Pointer to section headers.
+ * relsec: section index of SHT_RELA section.
+ *
+ * TODO: Some of the code belongs to generic code. Move that in kexec.c.
+ */
+int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
+ Elf64_Shdr *sechdrs, unsigned int relsec)
+{
+ unsigned int i;
+ Elf64_Rela *rel;
+ Elf64_Sym *sym;
+ void *location;
+ Elf64_Shdr *section, *symtabsec;
+ unsigned long address, sec_base, value;
+ const char *strtab, *name, *shstrtab;
+
+ /*
+ * ->sh_offset has been modified to keep the pointer to section
+ * contents in memory
+ */
+ rel = (void *)sechdrs[relsec].sh_offset;
+
+ /* Section to which relocations apply */
+ section = &sechdrs[sechdrs[relsec].sh_info];
+
+ pr_debug("Applying relocate section %u to %u\n", relsec,
+ sechdrs[relsec].sh_info);
+
+ /* Associated symbol table */
+ symtabsec = &sechdrs[sechdrs[relsec].sh_link];
+
+ /* String table */
+ if (symtabsec->sh_link >= ehdr->e_shnum) {
+ /* Invalid strtab section number */
+ pr_err("Invalid string table section index %d\n",
+ symtabsec->sh_link);
+ return -ENOEXEC;
+ }
+
+ strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset;
+
+ /* section header string table */
+ shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+
+ /*
+ * rel[i].r_offset contains byte offset from beginning
+ * of section to the storage unit affected.
+ *
+ * This is location to update (->sh_offset). This is temporary
+ * buffer where section is currently loaded. This will finally
+ * be loaded to a different address later, pointed to by
+ * ->sh_addr. kexec takes care of moving it
+ * (kexec_load_segment()).
+ */
+ location = (void *)(section->sh_offset + rel[i].r_offset);
+
+ /* Final address of the location */
+ address = section->sh_addr + rel[i].r_offset;
+
+ /*
+ * rel[i].r_info contains information about symbol table index
+ * w.r.t which relocation must be made and type of relocation
+ * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get
+ * these respectively.
+ */
+ sym = (Elf64_Sym *)symtabsec->sh_offset +
+ ELF64_R_SYM(rel[i].r_info);
+
+ if (sym->st_name)
+ name = strtab + sym->st_name;
+ else
+ name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+ pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n",
+ name, sym->st_info, sym->st_shndx, sym->st_value,
+ sym->st_size);
+
+ if (sym->st_shndx == SHN_UNDEF) {
+ pr_err("Undefined symbol: %s\n", name);
+ return -ENOEXEC;
+ }
+
+ if (sym->st_shndx == SHN_COMMON) {
+ pr_err("symbol '%s' in common section\n", name);
+ return -ENOEXEC;
+ }
+
+ if (sym->st_shndx == SHN_ABS)
+ sec_base = 0;
+ else if (sym->st_shndx >= ehdr->e_shnum) {
+ pr_err("Invalid section %d for symbol %s\n",
+ sym->st_shndx, name);
+ return -ENOEXEC;
+ } else
+ sec_base = sechdrs[sym->st_shndx].sh_addr;
+
+ value = sym->st_value;
+ value += sec_base;
+ value += rel[i].r_addend;
+
+ switch (ELF64_R_TYPE(rel[i].r_info)) {
+ case R_X86_64_NONE:
+ break;
+ case R_X86_64_64:
+ *(u64 *)location = value;
+ break;
+ case R_X86_64_32:
+ *(u32 *)location = value;
+ if (value != *(u32 *)location)
+ goto overflow;
+ break;
+ case R_X86_64_32S:
+ *(s32 *)location = value;
+ if ((s64)value != *(s32 *)location)
+ goto overflow;
+ break;
+ case R_X86_64_PC32:
+ value -= (u64)address;
+ *(u32 *)location = value;
+ break;
+ default:
+ pr_err("Unknown rela relocation: %llu\n",
+ ELF64_R_TYPE(rel[i].r_info));
+ return -ENOEXEC;
+ }
+ }
+ return 0;
+
+overflow:
+ pr_err("Overflow in relocation type %d value 0x%lx\n",
+ (int)ELF64_R_TYPE(rel[i].r_info), value);
+ return -ENOEXEC;
+}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1dbade870f90..d393ac669cc0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -350,7 +350,7 @@ out:
void vmalloc_sync_all(void)
{
- sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
+ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0);
}
/*
@@ -1218,7 +1218,8 @@ good_area:
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
- * the fault:
+ * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if
+ * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
*/
fault = handle_mm_fault(mm, vma, address, flags);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index df1a9927ad29..8f6803290083 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -178,7 +178,7 @@ __setup("noexec32=", nonx32_setup);
* When memory was added/removed make sure all the processes MM have
* suitable PGD entries in the local PGD level page.
*/
-void sync_global_pgds(unsigned long start, unsigned long end)
+void sync_global_pgds(unsigned long start, unsigned long end, int removed)
{
unsigned long address;
@@ -186,7 +186,12 @@ void sync_global_pgds(unsigned long start, unsigned long end)
const pgd_t *pgd_ref = pgd_offset_k(address);
struct page *page;
- if (pgd_none(*pgd_ref))
+ /*
+ * When it is called after memory hot remove, pgd_none()
+ * returns true. In this case (removed == 1), we must clear
+ * the PGD entries in the local PGD level page.
+ */
+ if (pgd_none(*pgd_ref) && !removed)
continue;
spin_lock(&pgd_lock);
@@ -199,12 +204,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
+ if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
BUG_ON(pgd_page_vaddr(*pgd)
!= pgd_page_vaddr(*pgd_ref));
+ if (removed) {
+ if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
+ pgd_clear(pgd);
+ } else {
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ }
+
spin_unlock(pgt_lock);
}
spin_unlock(&pgd_lock);
@@ -633,7 +644,7 @@ kernel_physical_mapping_init(unsigned long start,
}
if (pgd_changed)
- sync_global_pgds(addr, end - 1);
+ sync_global_pgds(addr, end - 1, 0);
__flush_tlb_all();
@@ -975,25 +986,26 @@ static void __meminit
remove_pagetable(unsigned long start, unsigned long end, bool direct)
{
unsigned long next;
+ unsigned long addr;
pgd_t *pgd;
pud_t *pud;
bool pgd_changed = false;
- for (; start < end; start = next) {
- next = pgd_addr_end(start, end);
+ for (addr = start; addr < end; addr = next) {
+ next = pgd_addr_end(addr, end);
- pgd = pgd_offset_k(start);
+ pgd = pgd_offset_k(addr);
if (!pgd_present(*pgd))
continue;
pud = (pud_t *)pgd_page_vaddr(*pgd);
- remove_pud_table(pud, start, next, direct);
+ remove_pud_table(pud, addr, next, direct);
if (free_pud_table(pud, pgd))
pgd_changed = true;
}
if (pgd_changed)
- sync_global_pgds(start, end - 1);
+ sync_global_pgds(start, end - 1, 1);
flush_tlb_all();
}
@@ -1340,7 +1352,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
else
err = vmemmap_populate_basepages(start, end, node);
if (!err)
- sync_global_pgds(start, end - 1);
+ sync_global_pgds(start, end - 1, 0);
return err;
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index a32b706c401a..d221374d5ce8 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -185,8 +185,8 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
return numa_add_memblk_to(nid, start, end, &numa_meminfo);
}
-/* Initialize NODE_DATA for a node on the local memory */
-static void __init setup_node_data(int nid, u64 start, u64 end)
+/* Allocate NODE_DATA for a node on the local memory */
+static void __init alloc_node_data(int nid)
{
const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
u64 nd_pa;
@@ -194,18 +194,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
int tnid;
/*
- * Don't confuse VM with a node that doesn't have the
- * minimum amount of memory:
- */
- if (end && (end - start) < NODE_MIN_SIZE)
- return;
-
- start = roundup(start, ZONE_ALIGN);
-
- printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
- nid, start, end - 1);
-
- /*
* Allocate node data. Try node-local memory and then any node.
* Never allocate in DMA zone.
*/
@@ -222,7 +210,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
nd = __va(nd_pa);
/* report and initialize */
- printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n",
+ printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid,
nd_pa, nd_pa + nd_size - 1);
tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
if (tnid != nid)
@@ -230,9 +218,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
node_data[nid] = nd;
memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
- NODE_DATA(nid)->node_id = nid;
- NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
- NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT;
node_set_online(nid);
}
@@ -523,8 +508,17 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
end = max(mi->blk[i].end, end);
}
- if (start < end)
- setup_node_data(nid, start, end);
+ if (start >= end)
+ continue;
+
+ /*
+ * Don't confuse VM with a node that doesn't have the
+ * minimum amount of memory:
+ */
+ if (end && (end - start) < NODE_MIN_SIZE)
+ continue;
+
+ alloc_node_data(nid);
}
/* Dump memblock with node info and return. */
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
new file mode 100644
index 000000000000..7fde9ee438a4
--- /dev/null
+++ b/arch/x86/purgatory/Makefile
@@ -0,0 +1,30 @@
+purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string.o
+
+targets += $(purgatory-y)
+PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+
+LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
+targets += purgatory.ro
+
+# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
+# in turn leaves some undefined symbols like __fentry__ in purgatory and not
+# sure how to relocate those. Like kexec-tools, use custom flags.
+
+KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
+
+$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+ $(call if_changed,ld)
+
+targets += kexec-purgatory.c
+
+quiet_cmd_bin2c = BIN2C $@
+ cmd_bin2c = cat $(obj)/purgatory.ro | $(objtree)/scripts/basic/bin2c kexec_purgatory > $(obj)/kexec-purgatory.c
+
+$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
+ $(call if_changed,bin2c)
+
+
+# No loaders for 32bits yet.
+ifeq ($(CONFIG_X86_64),y)
+ obj-$(CONFIG_KEXEC) += kexec-purgatory.o
+endif
diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S
new file mode 100644
index 000000000000..d1a4291d3568
--- /dev/null
+++ b/arch/x86/purgatory/entry64.S
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com)
+ * Copyright (C) 2014 Red Hat Inc.
+
+ * Author(s): Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This code has been taken from kexec-tools.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+ .text
+ .balign 16
+ .code64
+ .globl entry64, entry64_regs
+
+
+entry64:
+ /* Setup a gdt that should be preserved */
+ lgdt gdt(%rip)
+
+ /* load the data segments */
+ movl $0x18, %eax /* data segment */
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+ movl %eax, %fs
+ movl %eax, %gs
+
+ /* Setup new stack */
+ leaq stack_init(%rip), %rsp
+ pushq $0x10 /* CS */
+ leaq new_cs_exit(%rip), %rax
+ pushq %rax
+ lretq
+new_cs_exit:
+
+ /* Load the registers */
+ movq rax(%rip), %rax
+ movq rbx(%rip), %rbx
+ movq rcx(%rip), %rcx
+ movq rdx(%rip), %rdx
+ movq rsi(%rip), %rsi
+ movq rdi(%rip), %rdi
+ movq rsp(%rip), %rsp
+ movq rbp(%rip), %rbp
+ movq r8(%rip), %r8
+ movq r9(%rip), %r9
+ movq r10(%rip), %r10
+ movq r11(%rip), %r11
+ movq r12(%rip), %r12
+ movq r13(%rip), %r13
+ movq r14(%rip), %r14
+ movq r15(%rip), %r15
+
+ /* Jump to the new code... */
+ jmpq *rip(%rip)
+
+ .section ".rodata"
+ .balign 4
+entry64_regs:
+rax: .quad 0x0
+rcx: .quad 0x0
+rdx: .quad 0x0
+rbx: .quad 0x0
+rsp: .quad 0x0
+rbp: .quad 0x0
+rsi: .quad 0x0
+rdi: .quad 0x0
+r8: .quad 0x0
+r9: .quad 0x0
+r10: .quad 0x0
+r11: .quad 0x0
+r12: .quad 0x0
+r13: .quad 0x0
+r14: .quad 0x0
+r15: .quad 0x0
+rip: .quad 0x0
+ .size entry64_regs, . - entry64_regs
+
+ /* GDT */
+ .section ".rodata"
+ .balign 16
+gdt:
+ /* 0x00 unusable segment
+ * 0x08 unused
+ * so use them as gdt ptr
+ */
+ .word gdt_end - gdt - 1
+ .quad gdt
+ .word 0, 0, 0
+
+ /* 0x10 4GB flat code segment */
+ .word 0xFFFF, 0x0000, 0x9A00, 0x00AF
+
+ /* 0x18 4GB flat data segment */
+ .word 0xFFFF, 0x0000, 0x9200, 0x00CF
+gdt_end:
+stack: .quad 0, 0
+stack_init:
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
new file mode 100644
index 000000000000..25e068ba3382
--- /dev/null
+++ b/arch/x86/purgatory/purgatory.c
@@ -0,0 +1,72 @@
+/*
+ * purgatory: Runs between two kernels
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * Author:
+ * Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include "sha256.h"
+#include "../boot/string.h"
+
+struct sha_region {
+ unsigned long start;
+ unsigned long len;
+};
+
+unsigned long backup_dest = 0;
+unsigned long backup_src = 0;
+unsigned long backup_sz = 0;
+
+u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+
+struct sha_region sha_regions[16] = {};
+
+/*
+ * On x86, second kernel requries first 640K of memory to boot. Copy
+ * first 640K to a backup region in reserved memory range so that second
+ * kernel can use first 640K.
+ */
+static int copy_backup_region(void)
+{
+ if (backup_dest)
+ memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
+
+ return 0;
+}
+
+int verify_sha256_digest(void)
+{
+ struct sha_region *ptr, *end;
+ u8 digest[SHA256_DIGEST_SIZE];
+ struct sha256_state sctx;
+
+ sha256_init(&sctx);
+ end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
+ for (ptr = sha_regions; ptr < end; ptr++)
+ sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
+
+ sha256_final(&sctx, digest);
+
+ if (memcmp(digest, sha256_digest, sizeof(digest)))
+ return 1;
+
+ return 0;
+}
+
+void purgatory(void)
+{
+ int ret;
+
+ ret = verify_sha256_digest();
+ if (ret) {
+ /* loop forever */
+ for (;;)
+ ;
+ }
+ copy_backup_region();
+}
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
new file mode 100644
index 000000000000..fe3c91ba1bd0
--- /dev/null
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -0,0 +1,58 @@
+/*
+ * purgatory: setup code
+ *
+ * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com)
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * This code has been taken from kexec-tools.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+ .text
+ .globl purgatory_start
+ .balign 16
+purgatory_start:
+ .code64
+
+ /* Load a gdt so I know what the segment registers are */
+ lgdt gdt(%rip)
+
+ /* load the data segments */
+ movl $0x18, %eax /* data segment */
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+ movl %eax, %fs
+ movl %eax, %gs
+
+ /* Setup a stack */
+ leaq lstack_end(%rip), %rsp
+
+ /* Call the C code */
+ call purgatory
+ jmp entry64
+
+ .section ".rodata"
+ .balign 16
+gdt: /* 0x00 unusable segment
+ * 0x08 unused
+ * so use them as the gdt ptr
+ */
+ .word gdt_end - gdt - 1
+ .quad gdt
+ .word 0, 0, 0
+
+ /* 0x10 4GB flat code segment */
+ .word 0xFFFF, 0x0000, 0x9A00, 0x00AF
+
+ /* 0x18 4GB flat data segment */
+ .word 0xFFFF, 0x0000, 0x9200, 0x00CF
+gdt_end:
+
+ .bss
+ .balign 4096
+lstack:
+ .skip 4096
+lstack_end:
diff --git a/arch/x86/purgatory/sha256.c b/arch/x86/purgatory/sha256.c
new file mode 100644
index 000000000000..548ca675a14a
--- /dev/null
+++ b/arch/x86/purgatory/sha256.c
@@ -0,0 +1,283 @@
+/*
+ * SHA-256, as specified in
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ *
+ * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/bitops.h>
+#include <asm/byteorder.h>
+#include "sha256.h"
+#include "../boot/string.h"
+
+static inline u32 Ch(u32 x, u32 y, u32 z)
+{
+ return z ^ (x & (y ^ z));
+}
+
+static inline u32 Maj(u32 x, u32 y, u32 z)
+{
+ return (x & y) | (z & (x | y));
+}
+
+#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
+#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
+#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
+#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
+
+static inline void LOAD_OP(int I, u32 *W, const u8 *input)
+{
+ W[I] = __be32_to_cpu(((__be32 *)(input))[I]);
+}
+
+static inline void BLEND_OP(int I, u32 *W)
+{
+ W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
+}
+
+static void sha256_transform(u32 *state, const u8 *input)
+{
+ u32 a, b, c, d, e, f, g, h, t1, t2;
+ u32 W[64];
+ int i;
+
+ /* load the input */
+ for (i = 0; i < 16; i++)
+ LOAD_OP(i, W, input);
+
+ /* now blend */
+ for (i = 16; i < 64; i++)
+ BLEND_OP(i, W);
+
+ /* load the state into our registers */
+ a = state[0]; b = state[1]; c = state[2]; d = state[3];
+ e = state[4]; f = state[5]; g = state[6]; h = state[7];
+
+ /* now iterate */
+ t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ state[0] += a; state[1] += b; state[2] += c; state[3] += d;
+ state[4] += e; state[5] += f; state[6] += g; state[7] += h;
+
+ /* clear any sensitive info... */
+ a = b = c = d = e = f = g = h = t1 = t2 = 0;
+ memset(W, 0, 64 * sizeof(u32));
+}
+
+int sha256_init(struct sha256_state *sctx)
+{
+ sctx->state[0] = SHA256_H0;
+ sctx->state[1] = SHA256_H1;
+ sctx->state[2] = SHA256_H2;
+ sctx->state[3] = SHA256_H3;
+ sctx->state[4] = SHA256_H4;
+ sctx->state[5] = SHA256_H5;
+ sctx->state[6] = SHA256_H6;
+ sctx->state[7] = SHA256_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
+{
+ unsigned int partial, done;
+ const u8 *src;
+
+ partial = sctx->count & 0x3f;
+ sctx->count += len;
+ done = 0;
+ src = data;
+
+ if ((partial + len) > 63) {
+ if (partial) {
+ done = -partial;
+ memcpy(sctx->buf + partial, data, done + 64);
+ src = sctx->buf;
+ }
+
+ do {
+ sha256_transform(sctx->state, src);
+ done += 64;
+ src = data + done;
+ } while (done + 63 < len);
+
+ partial = 0;
+ }
+ memcpy(sctx->buf + partial, src, len - done);
+
+ return 0;
+}
+
+int sha256_final(struct sha256_state *sctx, u8 *out)
+{
+ __be32 *dst = (__be32 *)out;
+ __be64 bits;
+ unsigned int index, pad_len;
+ int i;
+ static const u8 padding[64] = { 0x80, };
+
+ /* Save number of bits */
+ bits = cpu_to_be64(sctx->count << 3);
+
+ /* Pad out to 56 mod 64. */
+ index = sctx->count & 0x3f;
+ pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
+ sha256_update(sctx, padding, pad_len);
+
+ /* Append length (before padding) */
+ sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
+
+ /* Store state in digest */
+ for (i = 0; i < 8; i++)
+ dst[i] = cpu_to_be32(sctx->state[i]);
+
+ /* Zeroize sensitive information. */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h
new file mode 100644
index 000000000000..bd15a4127735
--- /dev/null
+++ b/arch/x86/purgatory/sha256.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * Author: Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#ifndef SHA256_H
+#define SHA256_H
+
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+
+extern int sha256_init(struct sha256_state *sctx);
+extern int sha256_update(struct sha256_state *sctx, const u8 *input,
+ unsigned int length);
+extern int sha256_final(struct sha256_state *sctx, u8 *hash);
+
+#endif /* SHA256_H */
diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S
new file mode 100644
index 000000000000..3cefba1fefc8
--- /dev/null
+++ b/arch/x86/purgatory/stack.S
@@ -0,0 +1,19 @@
+/*
+ * purgatory: stack
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+ /* A stack for the loaded kernel.
+ * Seperate and in the data section so it can be prepopulated.
+ */
+ .data
+ .balign 4096
+ .globl stack, stack_end
+
+stack:
+ .skip 4096
+stack_end:
diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c
new file mode 100644
index 000000000000..d886b1fa36f0
--- /dev/null
+++ b/arch/x86/purgatory/string.c
@@ -0,0 +1,13 @@
+/*
+ * Simple string functions.
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * Author:
+ * Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include "../boot/string.c"
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 6705032c0520..17df6122b1e7 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -324,6 +324,7 @@
315 common sched_getattr sys_sched_getattr
316 common renameat2 sys_renameat2
317 common getrandom sys_getrandom
+318 common kexec_file_load sys_kexec_file_load
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index bc423f7b02da..38c8ac2970ac 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -646,6 +646,4 @@ void __init bio_integrity_init(void)
sizeof(struct bio_integrity_payload) +
sizeof(struct bio_vec) * BIP_INLINE_VECS,
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- if (!bip_slab)
- panic("Failed to create slab\n");
}
diff --git a/block/genhd.c b/block/genhd.c
index 791f41943132..7bd4372e8b6f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -849,7 +849,7 @@ static int show_partition(struct seq_file *seqf, void *v)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+ if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
diff --git a/crypto/zlib.c b/crypto/zlib.c
index 06b62e5cdcc7..c9ee681d57fd 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -168,7 +168,7 @@ static int zlib_compress_update(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
@@ -198,7 +198,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
@@ -283,7 +283,7 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
@@ -331,7 +331,7 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm,
}
ret = req->avail_out - stream->avail_out;
- pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
+ pr_debug("avail_in %lu, avail_out %lu (consumed %lu, produced %u)\n",
stream->avail_in, stream->avail_out,
req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 7671dbac6015..b0d5b5a12147 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -16,6 +16,7 @@ menuconfig ATA
depends on BLOCK
depends on !(M32R || M68K || S390) || BROKEN
select SCSI
+ select GLOB
---help---
If you want to use an ATA hard disk, ATA tape drive, ATA CD-ROM or
any other ATA device under Linux, say Y and make sure that you know
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index d19c37a7abc9..259d87937ce7 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -59,6 +59,7 @@
#include <linux/async.h>
#include <linux/log2.h>
#include <linux/slab.h>
+#include <linux/glob.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_host.h>
@@ -4250,73 +4251,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
{ }
};
-/**
- * glob_match - match a text string against a glob-style pattern
- * @text: the string to be examined
- * @pattern: the glob-style pattern to be matched against
- *
- * Either/both of text and pattern can be empty strings.
- *
- * Match text against a glob-style pattern, with wildcards and simple sets:
- *
- * ? matches any single character.
- * * matches any run of characters.
- * [xyz] matches a single character from the set: x, y, or z.
- * [a-d] matches a single character from the range: a, b, c, or d.
- * [a-d0-9] matches a single character from either range.
- *
- * The special characters ?, [, -, or *, can be matched using a set, eg. [*]
- * Behaviour with malformed patterns is undefined, though generally reasonable.
- *
- * Sample patterns: "SD1?", "SD1[0-5]", "*R0", "SD*1?[012]*xx"
- *
- * This function uses one level of recursion per '*' in pattern.
- * Since it calls _nothing_ else, and has _no_ explicit local variables,
- * this will not cause stack problems for any reasonable use here.
- *
- * RETURNS:
- * 0 on match, 1 otherwise.
- */
-static int glob_match (const char *text, const char *pattern)
-{
- do {
- /* Match single character or a '?' wildcard */
- if (*text == *pattern || *pattern == '?') {
- if (!*pattern++)
- return 0; /* End of both strings: match */
- } else {
- /* Match single char against a '[' bracketed ']' pattern set */
- if (!*text || *pattern != '[')
- break; /* Not a pattern set */
- while (*++pattern && *pattern != ']' && *text != *pattern) {
- if (*pattern == '-' && *(pattern - 1) != '[')
- if (*text > *(pattern - 1) && *text < *(pattern + 1)) {
- ++pattern;
- break;
- }
- }
- if (!*pattern || *pattern == ']')
- return 1; /* No match */
- while (*pattern && *pattern++ != ']');
- }
- } while (*++text && *pattern);
-
- /* Match any run of chars against a '*' wildcard */
- if (*pattern == '*') {
- if (!*++pattern)
- return 0; /* Match: avoid recursion at end of pattern */
- /* Loop to handle additional pattern chars after the wildcard */
- while (*text) {
- if (glob_match(text, pattern) == 0)
- return 0; /* Remainder matched */
- ++text; /* Absorb (match) this char and try again */
- }
- }
- if (!*text && !*pattern)
- return 0; /* End of both strings: match */
- return 1; /* No match */
-}
-
static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
{
unsigned char model_num[ATA_ID_PROD_LEN + 1];
@@ -4327,10 +4261,10 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
ata_id_c_string(dev->id, model_rev, ATA_ID_FW_REV, sizeof(model_rev));
while (ad->model_num) {
- if (!glob_match(model_num, ad->model_num)) {
+ if (glob_match(model_num, ad->model_num)) {
if (ad->model_rev == NULL)
return ad->horkage;
- if (!glob_match(model_rev, ad->model_rev))
+ if (glob_match(model_rev, ad->model_rev))
return ad->horkage;
}
ad++;
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 88500fed3c7a..4e7f0ff83ae7 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -289,16 +289,6 @@ config CMA_ALIGNMENT
If unsure, leave the default value "8".
-config CMA_AREAS
- int "Maximum count of the CMA device-private areas"
- default 7
- help
- CMA allows to create CMA areas for particular devices. This parameter
- sets the maximum number of such device private CMA areas in the
- system.
-
- If unsure, leave the default value "7".
-
endif
endmenu
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 6467c919c509..6606abdf880c 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -24,23 +24,9 @@
#include <linux/memblock.h>
#include <linux/err.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/page-isolation.h>
#include <linux/sizes.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/mm_types.h>
#include <linux/dma-contiguous.h>
-
-struct cma {
- unsigned long base_pfn;
- unsigned long count;
- unsigned long *bitmap;
- struct mutex lock;
-};
-
-struct cma *dma_contiguous_default_area;
+#include <linux/cma.h>
#ifdef CONFIG_CMA_SIZE_MBYTES
#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
@@ -48,6 +34,8 @@ struct cma *dma_contiguous_default_area;
#define CMA_SIZE_MBYTES 0
#endif
+struct cma *dma_contiguous_default_area;
+
/*
* Default global CMA area size can be defined in kernel's .config.
* This is useful mainly for distro maintainers to create a kernel
@@ -154,65 +142,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
}
}
-static DEFINE_MUTEX(cma_mutex);
-
-static int __init cma_activate_area(struct cma *cma)
-{
- int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
- unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
- unsigned i = cma->count >> pageblock_order;
- struct zone *zone;
-
- cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-
- if (!cma->bitmap)
- return -ENOMEM;
-
- WARN_ON_ONCE(!pfn_valid(pfn));
- zone = page_zone(pfn_to_page(pfn));
-
- do {
- unsigned j;
- base_pfn = pfn;
- for (j = pageblock_nr_pages; j; --j, pfn++) {
- WARN_ON_ONCE(!pfn_valid(pfn));
- /*
- * alloc_contig_range requires the pfn range
- * specified to be in the same zone. Make this
- * simple by forcing the entire CMA resv range
- * to be in the same zone.
- */
- if (page_zone(pfn_to_page(pfn)) != zone)
- goto err;
- }
- init_cma_reserved_pageblock(pfn_to_page(base_pfn));
- } while (--i);
-
- mutex_init(&cma->lock);
- return 0;
-
-err:
- kfree(cma->bitmap);
- return -EINVAL;
-}
-
-static struct cma cma_areas[MAX_CMA_AREAS];
-static unsigned cma_area_count;
-
-static int __init cma_init_reserved_areas(void)
-{
- int i;
-
- for (i = 0; i < cma_area_count; i++) {
- int ret = cma_activate_area(&cma_areas[i]);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-core_initcall(cma_init_reserved_areas);
-
/**
* dma_contiguous_reserve_area() - reserve custom contiguous area
* @size: Size of the reserved area (in bytes),
@@ -234,72 +163,17 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
phys_addr_t limit, struct cma **res_cma,
bool fixed)
{
- struct cma *cma = &cma_areas[cma_area_count];
- phys_addr_t alignment;
- int ret = 0;
-
- pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
- (unsigned long)size, (unsigned long)base,
- (unsigned long)limit);
-
- /* Sanity checks */
- if (cma_area_count == ARRAY_SIZE(cma_areas)) {
- pr_err("Not enough slots for CMA reserved regions!\n");
- return -ENOSPC;
- }
-
- if (!size)
- return -EINVAL;
-
- /* Sanitise input arguments */
- alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
- base = ALIGN(base, alignment);
- size = ALIGN(size, alignment);
- limit &= ~(alignment - 1);
-
- /* Reserve memory */
- if (base && fixed) {
- if (memblock_is_region_reserved(base, size) ||
- memblock_reserve(base, size) < 0) {
- ret = -EBUSY;
- goto err;
- }
- } else {
- phys_addr_t addr = memblock_alloc_range(size, alignment, base,
- limit);
- if (!addr) {
- ret = -ENOMEM;
- goto err;
- } else {
- base = addr;
- }
- }
-
- /*
- * Each reserved area must be initialised later, when more kernel
- * subsystems (like slab allocator) are available.
- */
- cma->base_pfn = PFN_DOWN(base);
- cma->count = size >> PAGE_SHIFT;
- *res_cma = cma;
- cma_area_count++;
+ int ret;
- pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
- (unsigned long)base);
+ ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma);
+ if (ret)
+ return ret;
/* Architecture specific contiguous memory fixup. */
- dma_contiguous_early_fixup(base, size);
- return 0;
-err:
- pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
- return ret;
-}
+ dma_contiguous_early_fixup(cma_get_base(*res_cma),
+ cma_get_size(*res_cma));
-static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
-{
- mutex_lock(&cma->lock);
- bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count);
- mutex_unlock(&cma->lock);
+ return 0;
}
/**
@@ -316,62 +190,10 @@ static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count)
struct page *dma_alloc_from_contiguous(struct device *dev, int count,
unsigned int align)
{
- unsigned long mask, pfn, pageno, start = 0;
- struct cma *cma = dev_get_cma_area(dev);
- struct page *page = NULL;
- int ret;
-
- if (!cma || !cma->count)
- return NULL;
-
if (align > CONFIG_CMA_ALIGNMENT)
align = CONFIG_CMA_ALIGNMENT;
- pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
- count, align);
-
- if (!count)
- return NULL;
-
- mask = (1 << align) - 1;
-
-
- for (;;) {
- mutex_lock(&cma->lock);
- pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
- start, count, mask);
- if (pageno >= cma->count) {
- mutex_unlock(&cma->lock);
- break;
- }
- bitmap_set(cma->bitmap, pageno, count);
- /*
- * It's safe to drop the lock here. We've marked this region for
- * our exclusive use. If the migration fails we will take the
- * lock again and unmark it.
- */
- mutex_unlock(&cma->lock);
-
- pfn = cma->base_pfn + pageno;
- mutex_lock(&cma_mutex);
- ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
- mutex_unlock(&cma_mutex);
- if (ret == 0) {
- page = pfn_to_page(pfn);
- break;
- } else if (ret != -EBUSY) {
- clear_cma_bitmap(cma, pfn, count);
- break;
- }
- clear_cma_bitmap(cma, pfn, count);
- pr_debug("%s(): memory range at %p is busy, retrying\n",
- __func__, pfn_to_page(pfn));
- /* try again with a bit different memory target */
- start = pageno + mask + 1;
- }
-
- pr_debug("%s(): returned %p\n", __func__, page);
- return page;
+ return cma_alloc(dev_get_cma_area(dev), count, align);
}
/**
@@ -387,23 +209,5 @@ struct page *dma_alloc_from_contiguous(struct device *dev, int count,
bool dma_release_from_contiguous(struct device *dev, struct page *pages,
int count)
{
- struct cma *cma = dev_get_cma_area(dev);
- unsigned long pfn;
-
- if (!cma || !pages)
- return false;
-
- pr_debug("%s(page %p)\n", __func__, (void *)pages);
-
- pfn = page_to_pfn(pages);
-
- if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
- return false;
-
- VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
-
- free_contig_range(pfn, count);
- clear_cma_bitmap(cma, pfn, count);
-
- return true;
+ return cma_release(dev_get_cma_area(dev), pages, count);
}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 89f752dd8465..a2e13e250bba 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -284,7 +284,7 @@ static int memory_subsys_online(struct device *dev)
* attribute and need to set the online_type.
*/
if (mem->online_type < 0)
- mem->online_type = ONLINE_KEEP;
+ mem->online_type = MMOP_ONLINE_KEEP;
ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
@@ -315,23 +315,23 @@ store_mem_state(struct device *dev,
if (ret)
return ret;
- if (!strncmp(buf, "online_kernel", min_t(int, count, 13)))
- online_type = ONLINE_KERNEL;
- else if (!strncmp(buf, "online_movable", min_t(int, count, 14)))
- online_type = ONLINE_MOVABLE;
- else if (!strncmp(buf, "online", min_t(int, count, 6)))
- online_type = ONLINE_KEEP;
- else if (!strncmp(buf, "offline", min_t(int, count, 7)))
- online_type = -1;
+ if (sysfs_streq(buf, "online_kernel"))
+ online_type = MMOP_ONLINE_KERNEL;
+ else if (sysfs_streq(buf, "online_movable"))
+ online_type = MMOP_ONLINE_MOVABLE;
+ else if (sysfs_streq(buf, "online"))
+ online_type = MMOP_ONLINE_KEEP;
+ else if (sysfs_streq(buf, "offline"))
+ online_type = MMOP_OFFLINE;
else {
ret = -EINVAL;
goto err;
}
switch (online_type) {
- case ONLINE_KERNEL:
- case ONLINE_MOVABLE:
- case ONLINE_KEEP:
+ case MMOP_ONLINE_KERNEL:
+ case MMOP_ONLINE_MOVABLE:
+ case MMOP_ONLINE_KEEP:
/*
* mem->online_type is not protected so there can be a
* race here. However, when racing online, the first
@@ -342,7 +342,7 @@ store_mem_state(struct device *dev,
mem->online_type = online_type;
ret = device_online(&mem->dev);
break;
- case -1:
+ case MMOP_OFFLINE:
ret = device_offline(&mem->dev);
break;
default:
@@ -406,7 +406,9 @@ memory_probe_store(struct device *dev, struct device_attribute *attr,
int i, ret;
unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
- phys_addr = simple_strtoull(buf, NULL, 0);
+ ret = kstrtoull(buf, 0, &phys_addr);
+ if (ret)
+ return ret;
if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
return -EINVAL;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 8f7ed9933a7c..c6d3ae05f1ca 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -126,7 +126,7 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(nid, NR_FILE_PAGES)),
nid, K(node_page_state(nid, NR_FILE_MAPPED)),
nid, K(node_page_state(nid, NR_ANON_PAGES)),
- nid, K(node_page_state(nid, NR_SHMEM)),
+ nid, K(i.sharedram),
nid, node_page_state(nid, NR_KERNEL_STACK) *
THREAD_SIZE / 1024,
nid, K(node_page_state(nid, NR_PAGETABLE)),
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 089e72cd37be..dfa4024c448a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -183,19 +183,32 @@ static ssize_t comp_algorithm_store(struct device *dev,
static int zram_test_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- return meta->table[index].flags & BIT(flag);
+ return meta->table[index].value & BIT(flag);
}
static void zram_set_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- meta->table[index].flags |= BIT(flag);
+ meta->table[index].value |= BIT(flag);
}
static void zram_clear_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- meta->table[index].flags &= ~BIT(flag);
+ meta->table[index].value &= ~BIT(flag);
+}
+
+static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+{
+ return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+}
+
+static void zram_set_obj_size(struct zram_meta *meta,
+ u32 index, size_t size)
+{
+ unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+
+ meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
}
static inline int is_partial_io(struct bio_vec *bvec)
@@ -255,7 +268,6 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
goto free_table;
}
- rwlock_init(&meta->tb_lock);
return meta;
free_table:
@@ -304,7 +316,12 @@ static void handle_zero_page(struct bio_vec *bvec)
flush_dcache_page(page);
}
-/* NOTE: caller should hold meta->tb_lock with write-side */
+
+/*
+ * To protect concurrent access to the same index entry,
+ * caller should hold this table index entry's bit_spinlock to
+ * indicate this index entry is accessing.
+ */
static void zram_free_page(struct zram *zram, size_t index)
{
struct zram_meta *meta = zram->meta;
@@ -324,11 +341,12 @@ static void zram_free_page(struct zram *zram, size_t index)
zs_free(meta->mem_pool, handle);
- atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size);
+ atomic64_sub(zram_get_obj_size(meta, index),
+ &zram->stats.compr_data_size);
atomic64_dec(&zram->stats.pages_stored);
meta->table[index].handle = 0;
- meta->table[index].size = 0;
+ zram_set_obj_size(meta, index, 0);
}
static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
@@ -337,14 +355,14 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
unsigned char *cmem;
struct zram_meta *meta = zram->meta;
unsigned long handle;
- u16 size;
+ size_t size;
- read_lock(&meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
handle = meta->table[index].handle;
- size = meta->table[index].size;
+ size = zram_get_obj_size(meta, index);
if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
- read_unlock(&meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
clear_page(mem);
return 0;
}
@@ -355,7 +373,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
else
ret = zcomp_decompress(zram->comp, cmem, size, mem);
zs_unmap_object(meta->mem_pool, handle);
- read_unlock(&meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
/* Should NEVER happen. Return bio error if it does. */
if (unlikely(ret)) {
@@ -376,14 +394,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
struct zram_meta *meta = zram->meta;
page = bvec->bv_page;
- read_lock(&meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
if (unlikely(!meta->table[index].handle) ||
zram_test_flag(meta, index, ZRAM_ZERO)) {
- read_unlock(&meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
handle_zero_page(bvec);
return 0;
}
- read_unlock(&meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
if (is_partial_io(bvec))
/* Use a temporary buffer to decompress the page */
@@ -461,10 +479,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
if (page_zero_filled(uncmem)) {
kunmap_atomic(user_mem);
/* Free memory associated with this sector now. */
- write_lock(&zram->meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
zram_free_page(zram, index);
zram_set_flag(meta, index, ZRAM_ZERO);
- write_unlock(&zram->meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
atomic64_inc(&zram->stats.zero_pages);
ret = 0;
@@ -514,12 +532,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
* Free memory associated with this sector
* before overwriting unused sectors.
*/
- write_lock(&zram->meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
zram_free_page(zram, index);
meta->table[index].handle = handle;
- meta->table[index].size = clen;
- write_unlock(&zram->meta->tb_lock);
+ zram_set_obj_size(meta, index, clen);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
/* Update stats */
atomic64_add(clen, &zram->stats.compr_data_size);
@@ -560,6 +578,7 @@ static void zram_bio_discard(struct zram *zram, u32 index,
int offset, struct bio *bio)
{
size_t n = bio->bi_iter.bi_size;
+ struct zram_meta *meta = zram->meta;
/*
* zram manages data in physical block size units. Because logical block
@@ -580,13 +599,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
while (n >= PAGE_SIZE) {
- /*
- * Discard request can be large so the lock hold times could be
- * lengthy. So take the lock once per page.
- */
- write_lock(&zram->meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
zram_free_page(zram, index);
- write_unlock(&zram->meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
index++;
n -= PAGE_SIZE;
}
@@ -622,11 +637,18 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
memset(&zram->stats, 0, sizeof(zram->stats));
zram->disksize = 0;
- if (reset_capacity) {
+ if (reset_capacity)
set_capacity(zram->disk, 0);
- revalidate_disk(zram->disk);
- }
+
up_write(&zram->init_lock);
+
+ /*
+ * Revalidate disk out of the init_lock to avoid lockdep splat.
+ * It's okay because disk's capacity is protected by init_lock
+ * so that revalidate_disk always sees up-to-date capacity.
+ */
+ if (reset_capacity)
+ revalidate_disk(zram->disk);
}
static ssize_t disksize_store(struct device *dev,
@@ -666,8 +688,15 @@ static ssize_t disksize_store(struct device *dev,
zram->comp = comp;
zram->disksize = disksize;
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
- revalidate_disk(zram->disk);
up_write(&zram->init_lock);
+
+ /*
+ * Revalidate disk out of the init_lock to avoid lockdep splat.
+ * It's okay because disk's capacity is protected by init_lock
+ * so that revalidate_disk always sees up-to-date capacity.
+ */
+ revalidate_disk(zram->disk);
+
return len;
out_destroy_comp:
@@ -807,9 +836,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
zram = bdev->bd_disk->private_data;
meta = zram->meta;
- write_lock(&meta->tb_lock);
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
zram_free_page(zram, index);
- write_unlock(&meta->tb_lock);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
atomic64_inc(&zram->stats.notify_free);
}
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 7f21c145e317..5b0afde729cd 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -43,7 +43,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
/*-- End of configurable params */
#define SECTOR_SHIFT 9
-#define SECTOR_SIZE (1 << SECTOR_SHIFT)
#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
#define ZRAM_LOGICAL_BLOCK_SHIFT 12
@@ -51,10 +50,24 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \
(1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
-/* Flags for zram pages (table[page_no].flags) */
+
+/*
+ * The lower ZRAM_FLAG_SHIFT bits of table.value is for
+ * object size (excluding header), the higher bits is for
+ * zram_pageflags.
+ *
+ * zram is mainly used for memory efficiency so we want to keep memory
+ * footprint small so we can squeeze size and flags into a field.
+ * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header),
+ * the higher bits is for zram_pageflags.
+ */
+#define ZRAM_FLAG_SHIFT 24
+
+/* Flags for zram pages (table[page_no].value) */
enum zram_pageflags {
/* Page consists entirely of zeros */
- ZRAM_ZERO,
+ ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1,
+ ZRAM_ACCESS, /* page in now accessed */
__NR_ZRAM_PAGEFLAGS,
};
@@ -62,11 +75,10 @@ enum zram_pageflags {
/*-- Data structures */
/* Allocated for each disk page */
-struct table {
+struct zram_table_entry {
unsigned long handle;
- u16 size; /* object size (excluding header) */
- u8 flags;
-} __aligned(4);
+ unsigned long value;
+};
struct zram_stats {
atomic64_t compr_data_size; /* compressed size of pages stored */
@@ -81,8 +93,7 @@ struct zram_stats {
};
struct zram_meta {
- rwlock_t tb_lock; /* protect table */
- struct table *table;
+ struct zram_table_entry *table;
struct zs_pool *mem_pool;
};
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
index 97cdd16a2169..018c29a26615 100644
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -138,6 +138,27 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
return entry;
}
+int efi_get_runtime_map_size(void)
+{
+ return nr_efi_runtime_map * efi_memdesc_size;
+}
+
+int efi_get_runtime_map_desc_size(void)
+{
+ return efi_memdesc_size;
+}
+
+int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+ size_t sz = efi_get_runtime_map_size();
+
+ if (sz > bufsz)
+ sz = bufsz;
+
+ memcpy(buf, efi_runtime_map, sz);
+ return 0;
+}
+
void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size)
{
efi_runtime_map = map;
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 17cf96c45f2b..79f18e6d9c4f 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -286,7 +286,11 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type)
{
struct firmware_map_entry *entry;
- entry = firmware_map_find_entry_bootmem(start, end, type);
+ entry = firmware_map_find_entry(start, end - 1, type);
+ if (entry)
+ return 0;
+
+ entry = firmware_map_find_entry_bootmem(start, end - 1, type);
if (!entry) {
entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
if (!entry)
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 7e4bae760e27..c3b80fd65d62 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -125,7 +125,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
parent = &entry->head;
}
if (parent) {
- hlist_add_after_rcu(parent, &item->head);
+ hlist_add_behind_rcu(&item->head, parent);
} else {
hlist_add_head_rcu(&item->head, h_list);
}
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index ae208f612198..cccef87963e0 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -688,7 +688,7 @@ static int atk_debugfs_gitm_get(void *p, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm,
atk_debugfs_gitm_get,
NULL,
- "0x%08llx\n")
+ "0x%08llx\n");
static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj)
{
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index a11ff74a5127..9eac8de9e8b7 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -178,6 +178,15 @@ comment "Input Device Drivers"
source "drivers/input/keyboard/Kconfig"
+config INPUT_LEDS
+ bool "LED Support"
+ depends on LEDS_CLASS = INPUT || LEDS_CLASS = y
+ select LEDS_TRIGGERS
+ default y
+ help
+ This option enables support for LEDs on keyboards managed
+ by the input layer.
+
source "drivers/input/mouse/Kconfig"
source "drivers/input/joystick/Kconfig"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 5ca3f631497f..2ab5f3336da5 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -6,6 +6,9 @@
obj-$(CONFIG_INPUT) += input-core.o
input-core-y := input.o input-compat.o input-mt.o ff-core.o
+ifeq ($(CONFIG_INPUT_LEDS),y)
+input-core-y += leds.o
+endif
obj-$(CONFIG_INPUT_FF_MEMLESS) += ff-memless.o
obj-$(CONFIG_INPUT_POLLDEV) += input-polldev.o
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 1c4c0db05550..3b9284b18e70 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -708,6 +708,9 @@ static void input_disconnect_device(struct input_dev *dev)
handle->open = 0;
spin_unlock_irq(&dev->event_lock);
+
+ if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+ input_led_disconnect(dev);
}
/**
@@ -2134,6 +2137,9 @@ int input_register_device(struct input_dev *dev)
list_add_tail(&dev->node, &input_dev_list);
+ if (is_event_supported(EV_LED, dev->evbit, EV_MAX))
+ input_led_connect(dev);
+
list_for_each_entry(handler, &input_handler_list, node)
input_attach_handler(dev, handler);
diff --git a/drivers/input/leds.c b/drivers/input/leds.c
new file mode 100644
index 000000000000..985fa7ebeec7
--- /dev/null
+++ b/drivers/input/leds.c
@@ -0,0 +1,249 @@
+/*
+ * LED support for the input layer
+ *
+ * Copyright 2010-2014 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/input.h>
+
+/*
+ * Keyboard LEDs are propagated by default like the following example:
+ *
+ * VT keyboard numlock trigger
+ * -> vt::numl VT LED
+ * -> vt-numl VT trigger
+ * -> per-device inputX::numl LED
+ *
+ * Userland can however choose the trigger for the vt::numl LED, or
+ * independently choose the trigger for any inputx::numl LED.
+ *
+ *
+ * VT LED classes and triggers are registered on-demand according to
+ * existing LED devices
+ */
+
+/* Handler for VT LEDs, just triggers the corresponding VT trigger. */
+static void vt_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness);
+static struct led_classdev vt_leds[LED_CNT] = {
+#define DEFINE_INPUT_LED(vt_led, nam, deftrig) \
+ [vt_led] = { \
+ .name = "vt::"nam, \
+ .max_brightness = 1, \
+ .brightness_set = vt_led_set, \
+ .default_trigger = deftrig, \
+ }
+/* Default triggers for the VT LEDs just correspond to the legacy
+ * usage. */
+ DEFINE_INPUT_LED(LED_NUML, "numl", "kbd-numlock"),
+ DEFINE_INPUT_LED(LED_CAPSL, "capsl", "kbd-capslock"),
+ DEFINE_INPUT_LED(LED_SCROLLL, "scrolll", "kbd-scrollock"),
+ DEFINE_INPUT_LED(LED_COMPOSE, "compose", NULL),
+ DEFINE_INPUT_LED(LED_KANA, "kana", "kbd-kanalock"),
+ DEFINE_INPUT_LED(LED_SLEEP, "sleep", NULL),
+ DEFINE_INPUT_LED(LED_SUSPEND, "suspend", NULL),
+ DEFINE_INPUT_LED(LED_MUTE, "mute", NULL),
+ DEFINE_INPUT_LED(LED_MISC, "misc", NULL),
+ DEFINE_INPUT_LED(LED_MAIL, "mail", NULL),
+ DEFINE_INPUT_LED(LED_CHARGING, "charging", NULL),
+};
+static const char *const vt_led_names[LED_CNT] = {
+ [LED_NUML] = "numl",
+ [LED_CAPSL] = "capsl",
+ [LED_SCROLLL] = "scrolll",
+ [LED_COMPOSE] = "compose",
+ [LED_KANA] = "kana",
+ [LED_SLEEP] = "sleep",
+ [LED_SUSPEND] = "suspend",
+ [LED_MUTE] = "mute",
+ [LED_MISC] = "misc",
+ [LED_MAIL] = "mail",
+ [LED_CHARGING] = "charging",
+};
+/* Handler for hotplug initialization */
+static void vt_led_trigger_activate(struct led_classdev *cdev);
+/* VT triggers */
+static struct led_trigger vt_led_triggers[LED_CNT] = {
+#define DEFINE_INPUT_LED_TRIGGER(vt_led, nam) \
+ [vt_led] = { \
+ .name = "vt-"nam, \
+ .activate = vt_led_trigger_activate, \
+ }
+ DEFINE_INPUT_LED_TRIGGER(LED_NUML, "numl"),
+ DEFINE_INPUT_LED_TRIGGER(LED_CAPSL, "capsl"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SCROLLL, "scrolll"),
+ DEFINE_INPUT_LED_TRIGGER(LED_COMPOSE, "compose"),
+ DEFINE_INPUT_LED_TRIGGER(LED_KANA, "kana"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SLEEP, "sleep"),
+ DEFINE_INPUT_LED_TRIGGER(LED_SUSPEND, "suspend"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MUTE, "mute"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MISC, "misc"),
+ DEFINE_INPUT_LED_TRIGGER(LED_MAIL, "mail"),
+ DEFINE_INPUT_LED_TRIGGER(LED_CHARGING, "charging"),
+};
+
+/* Lock for registration coherency */
+static DEFINE_MUTEX(vt_led_registered_lock);
+
+/* Which VT LED classes and triggers are registered */
+static unsigned long vt_led_registered[BITS_TO_LONGS(LED_CNT)];
+
+/* Number of input devices having each LED */
+static int vt_led_references[LED_CNT];
+
+/* VT LED state change, tell the VT trigger. */
+static void vt_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ int led = cdev - vt_leds;
+
+ led_trigger_event(&vt_led_triggers[led], !!brightness);
+}
+
+/* LED state change for some keyboard, notify that keyboard. */
+static void perdevice_input_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ struct input_dev *dev;
+ struct led_classdev *leds;
+ int led;
+
+ dev = cdev->dev->platform_data;
+ if (!dev)
+ /* Still initializing */
+ return;
+ leds = dev->leds;
+ led = cdev - leds;
+
+ input_event(dev, EV_LED, led, !!brightness);
+ input_event(dev, EV_SYN, SYN_REPORT, 0);
+}
+
+/* Keyboard hotplug, initialize its LED status */
+static void vt_led_trigger_activate(struct led_classdev *cdev)
+{
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - vt_led_triggers;
+
+ if (cdev->brightness_set)
+ cdev->brightness_set(cdev, vt_leds[led].brightness);
+}
+
+/* Free led stuff from input device, used at abortion and disconnection. */
+static void input_led_delete(struct input_dev *dev)
+{
+ if (dev) {
+ struct led_classdev *leds = dev->leds;
+ if (leds) {
+ int i;
+ for (i = 0; i < LED_CNT; i++)
+ kfree(leds[i].name);
+ kfree(leds);
+ dev->leds = NULL;
+ }
+ }
+}
+
+/* A new input device with potential LEDs to connect. */
+int input_led_connect(struct input_dev *dev)
+{
+ int i, error = 0;
+ struct led_classdev *leds;
+
+ dev->leds = leds = kcalloc(LED_CNT, sizeof(*leds), GFP_KERNEL);
+ if (!dev->leds)
+ return -ENOMEM;
+
+ /* lazily register missing VT LEDs */
+ mutex_lock(&vt_led_registered_lock);
+ for (i = 0; i < LED_CNT; i++)
+ if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+ if (!vt_led_references[i]) {
+ led_trigger_register(&vt_led_triggers[i]);
+ /* This keyboard is first to have led i,
+ * try to register it */
+ if (!led_classdev_register(NULL, &vt_leds[i]))
+ vt_led_references[i] = 1;
+ else
+ led_trigger_unregister(&vt_led_triggers[i]);
+ } else
+ vt_led_references[i]++;
+ }
+ mutex_unlock(&vt_led_registered_lock);
+
+ /* and register this device's LEDs */
+ for (i = 0; i < LED_CNT; i++)
+ if (vt_leds[i].name && test_bit(i, dev->ledbit)) {
+ leds[i].name = kasprintf(GFP_KERNEL, "%s::%s",
+ dev_name(&dev->dev),
+ vt_led_names[i]);
+ if (!leds[i].name) {
+ error = -ENOMEM;
+ goto err;
+ }
+ leds[i].max_brightness = 1;
+ leds[i].brightness_set = perdevice_input_led_set;
+ leds[i].default_trigger = vt_led_triggers[i].name;
+ }
+
+ /* No issue so far, we can register for real. */
+ for (i = 0; i < LED_CNT; i++)
+ if (leds[i].name) {
+ led_classdev_register(&dev->dev, &leds[i]);
+ leds[i].dev->platform_data = dev;
+ perdevice_input_led_set(&leds[i],
+ vt_leds[i].brightness);
+ }
+
+ return 0;
+
+err:
+ input_led_delete(dev);
+ return error;
+}
+
+/*
+ * Disconnected input device. Clean it, and deregister now-useless VT LEDs
+ * and triggers.
+ */
+void input_led_disconnect(struct input_dev *dev)
+{
+ int i;
+ struct led_classdev *leds = dev->leds;
+
+ for (i = 0; i < LED_CNT; i++)
+ if (leds[i].name)
+ led_classdev_unregister(&leds[i]);
+
+ input_led_delete(dev);
+
+ mutex_lock(&vt_led_registered_lock);
+ for (i = 0; i < LED_CNT; i++) {
+ if (!vt_leds[i].name || !test_bit(i, dev->ledbit))
+ continue;
+
+ vt_led_references[i]--;
+ if (vt_led_references[i]) {
+ /* Still some devices needing it */
+ continue;
+ }
+
+ led_classdev_unregister(&vt_leds[i]);
+ led_trigger_unregister(&vt_led_triggers[i]);
+ clear_bit(i, vt_led_registered);
+ }
+ mutex_unlock(&vt_led_registered_lock);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("User LED support for input layer");
+MODULE_AUTHOR("Samuel Thibault <samuel.thibault@ens-lyon.org>");
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 8736f69262f9..27cf0cdcdaab 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -11,9 +11,6 @@ menuconfig NEW_LEDS
Say Y to enable Linux LED support. This allows control of supported
LEDs from both userspace and optionally, by kernel events (triggers).
- This is not related to standard keyboard LEDs which are controlled
- via the input system.
-
if NEW_LEDS
config LEDS_CLASS
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 0bf1e4edf04d..6590558d1d31 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -42,7 +42,6 @@ DEFINE_MUTEX(lguest_lock);
static __init int map_switcher(void)
{
int i, err;
- struct page **pagep;
/*
* Map the Switcher in to high memory.
@@ -110,11 +109,9 @@ static __init int map_switcher(void)
* This code actually sets up the pages we've allocated to appear at
* switcher_addr. map_vm_area() takes the vma we allocated above, the
* kind of pages we're mapping (kernel pages), and a pointer to our
- * array of struct pages. It increments that pointer, but we don't
- * care.
+ * array of struct pages.
*/
- pagep = lg_switcher_pages;
- err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
+ err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages);
if (err) {
printk("lguest: map_vm_area failed: %i\n", err);
goto free_vma;
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 1972d57aadb3..e7fbc08a0627 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -342,7 +342,7 @@ void st_int_recv(void *disc_data,
/* Unknow packet? */
default:
type = *ptr;
- if (st_gdata->list[type] == NULL) {
+ if (type >= ST_MAX_CHANNELS || st_gdata->list[type] == NULL) {
pr_err("chip/interface misbehavior dropping"
" frame starting with 0x%02x", type);
goto done;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 3abd3cbab75f..053e8506fd97 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1853,7 +1853,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
/* add filter to the list */
if (parent)
- hlist_add_after(&parent->fdir_node, &input->fdir_node);
+ hlist_add_behind(&input->fdir_node, &parent->fdir_node);
else
hlist_add_head(&input->fdir_node,
&pf->fdir_filter_list);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index a452730a3278..a6e5bcc12c8f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2518,7 +2518,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
/* add filter to the list */
if (parent)
- hlist_add_after(&parent->fdir_node, &input->fdir_node);
+ hlist_add_behind(&input->fdir_node, &parent->fdir_node);
else
hlist_add_head(&input->fdir_node,
&adapter->fdir_filter_list);
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 768dfe9a9315..6d3e2093bf7f 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = {
.resume = toshoboe_wakeup
};
-static int __init
-donauboe_init (void)
-{
- return pci_register_driver(&donauboe_pci_driver);
-}
-
-static void __exit
-donauboe_cleanup (void)
-{
- pci_unregister_driver(&donauboe_pci_driver);
-}
-
-module_init(donauboe_init);
-module_exit(donauboe_cleanup);
+module_pci_driver(donauboe_pci_driver);
diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c
index c864f82bd37d..30e981be14c2 100644
--- a/drivers/parport/parport_ip32.c
+++ b/drivers/parport/parport_ip32.c
@@ -2204,7 +2204,7 @@ static int __init parport_ip32_init(void)
{
pr_info(PPIP32 "SGI IP32 built-in parallel port driver v0.6\n");
this_port = parport_ip32_probe_port();
- return IS_ERR(this_port) ? PTR_ERR(this_port) : 0;
+ return PTR_ERR_OR_ZERO(this_port);
}
/**
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 0754f5c7cb3b..83743a1a6731 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -760,6 +760,15 @@ config RTC_DRV_DS1742
This driver can also be built as a module. If so, the module
will be called rtc-ds1742.
+config RTC_DRV_DS2404
+ tristate "Maxim/Dallas DS2404"
+ help
+ If you say yes here you get support for the
+ Dallas DS2404 RTC chip.
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-ds2404.
+
config RTC_DRV_DA9052
tristate "Dialog DA9052/DA9053 RTC"
depends on PMIC_DA9052
@@ -789,7 +798,7 @@ config RTC_DRV_DA9063
config RTC_DRV_EFI
tristate "EFI RTC"
- depends on IA64
+ depends on EFI
help
If you say yes here you will get support for the EFI
Real Time Clock.
@@ -873,15 +882,6 @@ config RTC_DRV_V3020
This driver can also be built as a module. If so, the module
will be called rtc-v3020.
-config RTC_DRV_DS2404
- tristate "Dallas DS2404"
- help
- If you say yes here you get support for the
- Dallas DS2404 RTC chip.
-
- This driver can also be built as a module. If so, the module
- will be called rtc-ds2404.
-
config RTC_DRV_WM831X
tristate "Wolfson Microelectronics WM831x RTC"
depends on MFD_WM831X
@@ -1349,6 +1349,7 @@ config RTC_DRV_SIRFSOC
config RTC_DRV_MOXART
tristate "MOXA ART RTC"
+ depends on ARCH_MOXART || COMPILE_TEST
help
If you say yes here you get support for the MOXA ART
RTC module.
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 70347d041d10..f1dfc3648ee5 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -10,6 +10,10 @@ obj-$(CONFIG_RTC_SYSTOHC) += systohc.o
obj-$(CONFIG_RTC_CLASS) += rtc-core.o
rtc-core-y := class.o interface.o
+ifdef CONFIG_RTC_DRV_EFI
+rtc-core-y += rtc-efi-platform.o
+endif
+
rtc-core-$(CONFIG_RTC_INTF_DEV) += rtc-dev.o
rtc-core-$(CONFIG_RTC_INTF_PROC) += rtc-proc.o
rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 5813fa52c3d4..5b2717f5dafa 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -348,6 +348,8 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
/* Make sure we're not setting alarms in the past */
err = __rtc_read_time(rtc, &tm);
+ if (err)
+ return err;
rtc_tm_to_time(&tm, &now);
if (scheduled <= now)
return -ETIME;
diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c
index c3719189dd96..ae9f997223b1 100644
--- a/drivers/rtc/rtc-ds1343.c
+++ b/drivers/rtc/rtc-ds1343.c
@@ -4,6 +4,7 @@
* Real Time Clock
*
* Author : Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>
+ * Ankur Srivastava <sankurece@gmail.com> : DS1343 Nvram Support
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -45,6 +46,9 @@
#define DS1343_CONTROL_REG 0x0F
#define DS1343_STATUS_REG 0x10
#define DS1343_TRICKLE_REG 0x11
+#define DS1343_NVRAM 0x20
+
+#define DS1343_NVRAM_LEN 96
/* DS1343 Control Registers bits */
#define DS1343_EOSC 0x80
@@ -149,6 +153,64 @@ static ssize_t ds1343_store_glitchfilter(struct device *dev,
static DEVICE_ATTR(glitch_filter, S_IRUGO | S_IWUSR, ds1343_show_glitchfilter,
ds1343_store_glitchfilter);
+static ssize_t ds1343_nvram_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ int ret;
+ unsigned char address;
+ struct device *dev = kobj_to_dev(kobj);
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+
+ if (unlikely(!count))
+ return count;
+
+ if ((count + off) > DS1343_NVRAM_LEN)
+ count = DS1343_NVRAM_LEN - off;
+
+ address = DS1343_NVRAM + off;
+
+ ret = regmap_bulk_write(priv->map, address, buf, count);
+ if (ret < 0)
+ dev_err(&priv->spi->dev, "Error in nvram write %d", ret);
+
+ return (ret < 0) ? ret : count;
+}
+
+
+static ssize_t ds1343_nvram_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ int ret;
+ unsigned char address;
+ struct device *dev = kobj_to_dev(kobj);
+ struct ds1343_priv *priv = dev_get_drvdata(dev);
+
+ if (unlikely(!count))
+ return count;
+
+ if ((count + off) > DS1343_NVRAM_LEN)
+ count = DS1343_NVRAM_LEN - off;
+
+ address = DS1343_NVRAM + off;
+
+ ret = regmap_bulk_read(priv->map, address, buf, count);
+ if (ret < 0)
+ dev_err(&priv->spi->dev, "Error in nvram read %d\n", ret);
+
+ return (ret < 0) ? ret : count;
+}
+
+
+static struct bin_attribute nvram_attr = {
+ .attr.name = "nvram",
+ .attr.mode = S_IRUGO | S_IWUSR,
+ .read = ds1343_nvram_read,
+ .write = ds1343_nvram_write,
+ .size = DS1343_NVRAM_LEN,
+};
+
static ssize_t ds1343_show_alarmstatus(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -274,12 +336,16 @@ static int ds1343_sysfs_register(struct device *dev)
if (err)
goto error1;
+ err = device_create_bin_file(dev, &nvram_attr);
+ if (err)
+ goto error2;
+
if (priv->irq <= 0)
return err;
err = device_create_file(dev, &dev_attr_alarm_mode);
if (err)
- goto error2;
+ goto error3;
err = device_create_file(dev, &dev_attr_alarm_status);
if (!err)
@@ -287,6 +353,9 @@ static int ds1343_sysfs_register(struct device *dev)
device_remove_file(dev, &dev_attr_alarm_mode);
+error3:
+ device_remove_bin_file(dev, &nvram_attr);
+
error2:
device_remove_file(dev, &dev_attr_trickle_charger);
@@ -302,6 +371,7 @@ static void ds1343_sysfs_unregister(struct device *dev)
device_remove_file(dev, &dev_attr_glitch_filter);
device_remove_file(dev, &dev_attr_trickle_charger);
+ device_remove_bin_file(dev, &nvram_attr);
if (priv->irq <= 0)
return;
@@ -684,6 +754,7 @@ static struct spi_driver ds1343_driver = {
module_spi_driver(ds1343_driver);
MODULE_DESCRIPTION("DS1343 RTC SPI Driver");
-MODULE_AUTHOR("Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>");
+MODULE_AUTHOR("Raghavendra Chandra Ganiga <ravi23ganiga@gmail.com>,"
+ "Ankur Srivastava <sankurece@gmail.com>");
MODULE_LICENSE("GPL v2");
MODULE_VERSION(DS1343_DRV_VERSION);
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index c6b2191a4128..9822715db8ba 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -231,7 +231,7 @@ static struct platform_driver ds1742_rtc_driver = {
.driver = {
.name = "rtc-ds1742",
.owner = THIS_MODULE,
- .of_match_table = ds1742_rtc_of_match,
+ .of_match_table = of_match_ptr(ds1742_rtc_of_match),
},
};
diff --git a/drivers/rtc/rtc-efi-platform.c b/drivers/rtc/rtc-efi-platform.c
new file mode 100644
index 000000000000..1a7f890bff5b
--- /dev/null
+++ b/drivers/rtc/rtc-efi-platform.c
@@ -0,0 +1,30 @@
+/*
+ * Moved from arch/ia64/kernel/time.c
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999-2000 VA Linux Systems
+ * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/platform_device.h>
+
+static struct platform_device rtc_efi_dev = {
+ .name = "rtc-efi",
+ .id = -1,
+};
+
+static int __init rtc_init(void)
+{
+ if (platform_device_register(&rtc_efi_dev) < 0)
+ pr_err("unable to register rtc device...\n");
+
+ /* not necessarily an error */
+ return 0;
+}
+module_init(rtc_init);
diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index c4c38431012e..8225b89de810 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/stringify.h>
#include <linux/time.h>
#include <linux/platform_device.h>
#include <linux/rtc.h>
@@ -48,8 +49,8 @@ compute_wday(efi_time_t *eft)
int y;
int ndays = 0;
- if (eft->year < 1998) {
- pr_err("EFI year < 1998, invalid date\n");
+ if (eft->year < EFI_RTC_EPOCH) {
+ pr_err("EFI year < " __stringify(EFI_RTC_EPOCH) ", invalid date\n");
return -1;
}
@@ -78,19 +79,36 @@ convert_to_efi_time(struct rtc_time *wtime, efi_time_t *eft)
eft->timezone = EFI_UNSPECIFIED_TIMEZONE;
}
-static void
+static bool
convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
{
memset(wtime, 0, sizeof(*wtime));
+
+ if (eft->second >= 60)
+ return false;
wtime->tm_sec = eft->second;
+
+ if (eft->minute >= 60)
+ return false;
wtime->tm_min = eft->minute;
+
+ if (eft->hour >= 24)
+ return false;
wtime->tm_hour = eft->hour;
+
+ if (!eft->day || eft->day > 31)
+ return false;
wtime->tm_mday = eft->day;
+
+ if (!eft->month || eft->month > 12)
+ return false;
wtime->tm_mon = eft->month - 1;
wtime->tm_year = eft->year - 1900;
/* day of the week [0-6], Sunday=0 */
wtime->tm_wday = compute_wday(eft);
+ if (wtime->tm_wday < 0)
+ return false;
/* day in the year [1-365]*/
wtime->tm_yday = compute_yday(eft);
@@ -106,6 +124,8 @@ convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
default:
wtime->tm_isdst = -1;
}
+
+ return true;
}
static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
@@ -122,7 +142,8 @@ static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
if (status != EFI_SUCCESS)
return -EINVAL;
- convert_from_efi_time(&eft, &wkalrm->time);
+ if (!convert_from_efi_time(&eft, &wkalrm->time))
+ return -EIO;
return rtc_valid_tm(&wkalrm->time);
}
@@ -163,7 +184,8 @@ static int efi_read_time(struct device *dev, struct rtc_time *tm)
return -EINVAL;
}
- convert_from_efi_time(&eft, tm);
+ if (!convert_from_efi_time(&eft, tm))
+ return -EIO;
return rtc_valid_tm(tm);
}
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 02b0379ae550..4f34dc0095b5 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -585,7 +585,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
int ret;
- struct page **page_array_ptr;
page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
@@ -598,8 +597,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
}
tmp_area.addr = page_addr;
tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */;
- page_array_ptr = page;
- ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr);
+ ret = map_vm_area(&tmp_area, PAGE_KERNEL, page);
if (ret) {
pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n",
proc->pid, page_addr);
diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c
index 5dde79418297..8ef1deb59d4a 100644
--- a/drivers/staging/lustre/lustre/libcfs/hash.c
+++ b/drivers/staging/lustre/lustre/libcfs/hash.c
@@ -351,7 +351,7 @@ cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
cfs_hash_dhead_t, dh_head);
if (dh->dh_tail != NULL) /* not empty */
- hlist_add_after(dh->dh_tail, hnode);
+ hlist_add_behind(hnode, dh->dh_tail);
else /* empty list */
hlist_add_head(hnode, &dh->dh_head);
dh->dh_tail = hnode;
@@ -406,7 +406,7 @@ cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
cfs_hash_dhead_dep_t, dd_head);
if (dh->dd_tail != NULL) /* not empty */
- hlist_add_after(dh->dd_tail, hnode);
+ hlist_add_behind(hnode, dh->dd_tail);
else /* empty list */
hlist_add_head(hnode, &dh->dd_head);
dh->dd_tail = hnode;
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index b24aa010f68c..65cd80bf9aec 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -13,6 +13,10 @@ config VT
bool "Virtual terminal" if EXPERT
depends on !S390 && !UML
select INPUT
+ select NEW_LEDS
+ select LEDS_CLASS
+ select LEDS_TRIGGERS
+ select INPUT_LEDS
default y
---help---
If you say Y here, you will get support for terminal devices with
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index d0e3a4497707..d6ecfc9e734f 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/leds.h>
#include <linux/kbd_kern.h>
#include <linux/kbd_diacr.h>
@@ -130,6 +131,7 @@ static char rep; /* flag telling character repeat */
static int shift_state = 0;
static unsigned char ledstate = 0xff; /* undefined */
+static unsigned char lockstate = 0xff; /* undefined */
static unsigned char ledioctl;
/*
@@ -961,6 +963,41 @@ static void k_brl(struct vc_data *vc, unsigned char value, char up_flag)
}
}
+/* We route VT keyboard "leds" through triggers */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_ledstate[] = {
+#define DEFINE_LEDSTATE_TRIGGER(kbd_led, nam) \
+ [kbd_led] = { \
+ .name = nam, \
+ .activate = kbd_ledstate_trigger_activate, \
+ }
+ DEFINE_LEDSTATE_TRIGGER(VC_SCROLLOCK, "kbd-scrollock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_NUMLOCK, "kbd-numlock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_CAPSLOCK, "kbd-capslock"),
+ DEFINE_LEDSTATE_TRIGGER(VC_KANALOCK, "kbd-kanalock"),
+#undef DEFINE_LEDSTATE_TRIGGER
+};
+
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev);
+
+static struct led_trigger ledtrig_lockstate[] = {
+#define DEFINE_LOCKSTATE_TRIGGER(kbd_led, nam) \
+ [kbd_led] = { \
+ .name = nam, \
+ .activate = kbd_lockstate_trigger_activate, \
+ }
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLOCK, "kbd-shiftlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_ALTGRLOCK, "kbd-altgrlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLOCK, "kbd-ctrllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_ALTLOCK, "kbd-altlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLLOCK, "kbd-shiftllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTRLOCK, "kbd-shiftrlock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLLOCK, "kbd-ctrlllock"),
+ DEFINE_LOCKSTATE_TRIGGER(VC_CTRLRLOCK, "kbd-ctrlrlock"),
+#undef DEFINE_LOCKSTATE_TRIGGER
+};
+
/*
* The leds display either (i) the status of NumLock, CapsLock, ScrollLock,
* or (ii) whatever pattern of lights people want to show using KDSETLED,
@@ -995,18 +1032,25 @@ static inline unsigned char getleds(void)
return kbd->ledflagstate;
}
-static int kbd_update_leds_helper(struct input_handle *handle, void *data)
+/* Called on trigger connection, to set initial state */
+static void kbd_ledstate_trigger_activate(struct led_classdev *cdev)
{
- unsigned char leds = *(unsigned char *)data;
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - ledtrig_ledstate;
- if (test_bit(EV_LED, handle->dev->evbit)) {
- input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01));
- input_inject_event(handle, EV_LED, LED_NUML, !!(leds & 0x02));
- input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & 0x04));
- input_inject_event(handle, EV_SYN, SYN_REPORT, 0);
- }
+ tasklet_disable(&keyboard_tasklet);
+ led_trigger_event(trigger, ledstate & (1 << led) ? LED_FULL : LED_OFF);
+ tasklet_enable(&keyboard_tasklet);
+}
- return 0;
+static void kbd_lockstate_trigger_activate(struct led_classdev *cdev)
+{
+ struct led_trigger *trigger = cdev->trigger;
+ int led = trigger - ledtrig_lockstate;
+
+ tasklet_disable(&keyboard_tasklet);
+ led_trigger_event(trigger, lockstate & (1 << led) ? LED_FULL : LED_OFF);
+ tasklet_enable(&keyboard_tasklet);
}
/**
@@ -1095,16 +1139,29 @@ static void kbd_bh(unsigned long dummy)
{
unsigned char leds;
unsigned long flags;
-
+ int i;
+
spin_lock_irqsave(&led_lock, flags);
leds = getleds();
spin_unlock_irqrestore(&led_lock, flags);
if (leds != ledstate) {
- input_handler_for_each_handle(&kbd_handler, &leds,
- kbd_update_leds_helper);
+ for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++)
+ if ((leds ^ ledstate) & (1 << i))
+ led_trigger_event(&ledtrig_ledstate[i],
+ leds & (1 << i)
+ ? LED_FULL : LED_OFF);
ledstate = leds;
}
+
+ if (kbd->lockstate != lockstate) {
+ for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++)
+ if ((kbd->lockstate ^ lockstate) & (1 << i))
+ led_trigger_event(&ledtrig_lockstate[i],
+ kbd->lockstate & (1 << i)
+ ? LED_FULL : LED_OFF);
+ lockstate = kbd->lockstate;
+ }
}
DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh, 0);
@@ -1442,20 +1499,6 @@ static void kbd_disconnect(struct input_handle *handle)
kfree(handle);
}
-/*
- * Start keyboard handler on the new keyboard by refreshing LED state to
- * match the rest of the system.
- */
-static void kbd_start(struct input_handle *handle)
-{
- tasklet_disable(&keyboard_tasklet);
-
- if (ledstate != 0xff)
- kbd_update_leds_helper(handle, &ledstate);
-
- tasklet_enable(&keyboard_tasklet);
-}
-
static const struct input_device_id kbd_ids[] = {
{
.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
@@ -1477,7 +1520,6 @@ static struct input_handler kbd_handler = {
.match = kbd_match,
.connect = kbd_connect,
.disconnect = kbd_disconnect,
- .start = kbd_start,
.name = "kbd",
.id_table = kbd_ids,
};
@@ -1501,6 +1543,20 @@ int __init kbd_init(void)
if (error)
return error;
+ for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++) {
+ error = led_trigger_register(&ledtrig_ledstate[i]);
+ if (error)
+ pr_err("error %d while registering trigger %s\n",
+ error, ledtrig_ledstate[i].name);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++) {
+ error = led_trigger_register(&ledtrig_lockstate[i]);
+ if (error)
+ pr_err("error %d while registering trigger %s\n",
+ error, ledtrig_lockstate[i].name);
+ }
+
tasklet_enable(&keyboard_tasklet);
tasklet_schedule(&keyboard_tasklet);
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 428089009cd5..19b170d9e570 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -190,8 +190,6 @@ static ssize_t brightness_store(struct device *dev,
}
mutex_unlock(&bd->ops_lock);
- backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);
-
return rc;
}
static DEVICE_ATTR_RW(brightness);
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index c770337c4b45..24575d9d882d 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -153,6 +153,7 @@ extern int adfs_map_lookup(struct super_block *sb, unsigned int frag_id, unsigne
extern unsigned int adfs_map_free(struct super_block *sb);
/* Misc */
+__printf(3, 4)
void __adfs_error(struct super_block *sb, const char *function,
const char *fmt, ...);
#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 0d138c0de293..51c279a29845 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -138,7 +138,7 @@ adfs_dir_lookup_byname(struct inode *inode, struct qstr *name, struct object_inf
goto out;
if (ADFS_I(inode)->parent_id != dir.parent_id) {
- adfs_error(sb, "parent directory changed under me! (%lx but got %lx)\n",
+ adfs_error(sb, "parent directory changed under me! (%lx but got %x)\n",
ADFS_I(inode)->parent_id, dir.parent_id);
ret = -EIO;
goto free_out;
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index d9e3bee4e653..f2ba88ab4aed 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -55,10 +55,10 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
}
size >>= sb->s_blocksize_bits;
- if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) {
+ if (size > ARRAY_SIZE(dir->bh)) {
/* this directory is too big for fixed bh set, must allocate */
struct buffer_head **bh_fplus =
- kzalloc(size * sizeof(struct buffer_head *),
+ kcalloc(size, sizeof(struct buffer_head *),
GFP_KERNEL);
if (!bh_fplus) {
adfs_error(sb, "not enough memory for"
@@ -79,9 +79,8 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
dir->bh_fplus[blk] = sb_bread(sb, block);
if (!dir->bh_fplus[blk]) {
- adfs_error(sb, "dir object %X failed read for"
- " offset %d, mapped block %X",
- id, blk, block);
+ adfs_error(sb, "dir object %x failed read for offset %d, mapped block %lX",
+ id, blk, block);
goto out;
}
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index f7f87e233dd9..f40006db36df 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -46,6 +46,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
/* inode.c */
extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
+extern void bfs_dump_imap(const char *, struct super_block *);
/* file.c */
extern const struct inode_operations bfs_file_inops;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index a399e6d9dc74..08063ae0a17c 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -75,8 +75,6 @@ const struct file_operations bfs_dir_operations = {
.llseek = generic_file_llseek,
};
-extern void dump_imap(const char *, struct super_block *);
-
static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
@@ -110,7 +108,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
BFS_I(inode)->i_eblock = 0;
insert_inode_hash(inode);
mark_inode_dirty(inode);
- dump_imap("create", s);
+ bfs_dump_imap("create", s);
err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len,
inode->i_ino);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 7041ac35ace8..90bc079d9982 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -30,8 +30,6 @@ MODULE_LICENSE("GPL");
#define dprintf(x...)
#endif
-void dump_imap(const char *prefix, struct super_block *s);
-
struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
{
struct bfs_inode *di;
@@ -194,7 +192,7 @@ static void bfs_evict_inode(struct inode *inode)
info->si_freeb += bi->i_eblock + 1 - bi->i_sblock;
info->si_freei++;
clear_bit(ino, info->si_imap);
- dump_imap("delete_inode", s);
+ bfs_dump_imap("delete_inode", s);
}
/*
@@ -297,7 +295,7 @@ static const struct super_operations bfs_sops = {
.statfs = bfs_statfs,
};
-void dump_imap(const char *prefix, struct super_block *s)
+void bfs_dump_imap(const char *prefix, struct super_block *s)
{
#ifdef DEBUG
int i;
@@ -443,7 +441,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
}
brelse(bh);
brelse(sbh);
- dump_imap("read_super", s);
+ bfs_dump_imap("read_super", s);
return 0;
out3:
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3892c1a23241..d94672a1f272 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -145,6 +145,25 @@ static int padzero(unsigned long elf_bss)
#define ELF_BASE_PLATFORM NULL
#endif
+/*
+ * Use get_random_int() to implement AT_RANDOM while avoiding depletion
+ * of the entropy pool.
+ */
+static void get_atrandom_bytes(unsigned char *buf, size_t nbytes)
+{
+ unsigned char *p = buf;
+
+ while (nbytes) {
+ unsigned int random_variable;
+ size_t chunk = min_t(size_t, nbytes, sizeof(random_variable));
+
+ random_variable = get_random_int();
+ memcpy(p, &random_variable, chunk);
+ p += chunk;
+ nbytes -= chunk;
+ }
+}
+
static int
create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
unsigned long load_addr, unsigned long interp_load_addr)
@@ -206,7 +225,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
/*
* Generate 16 random bytes for userspace PRNG seeding.
*/
- get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
+ get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes));
u_rand_bytes = (elf_addr_t __user *)
STACK_ALLOC(p, sizeof(k_rand_bytes));
if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index d749731dc0ee..dd333a67ebc2 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -39,13 +39,11 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
args);
/* start by checking things over */
- ASSERT(cache->fstop_percent >= 0 &&
- cache->fstop_percent < cache->fcull_percent &&
+ ASSERT(cache->fstop_percent < cache->fcull_percent &&
cache->fcull_percent < cache->frun_percent &&
cache->frun_percent < 100);
- ASSERT(cache->bstop_percent >= 0 &&
- cache->bstop_percent < cache->bcull_percent &&
+ ASSERT(cache->bstop_percent < cache->bcull_percent &&
cache->bcull_percent < cache->brun_percent &&
cache->brun_percent < 100);
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index b078d3081d6c..ff43e5a8711f 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -222,7 +222,7 @@ static ssize_t cachefiles_daemon_write(struct file *file,
if (test_bit(CACHEFILES_DEAD, &cache->flags))
return -EIO;
- if (datalen < 0 || datalen > PAGE_SIZE - 1)
+ if (datalen > PAGE_SIZE - 1)
return -EOPNOTSUPP;
/* drag the command string into the kernel so we can parse it */
@@ -385,7 +385,7 @@ static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args)
if (args[0] != '%' || args[1] != '\0')
return -EINVAL;
- if (fstop < 0 || fstop >= cache->fcull_percent)
+ if (fstop >= cache->fcull_percent)
return cachefiles_daemon_range_error(cache, args);
cache->fstop_percent = fstop;
@@ -457,7 +457,7 @@ static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args)
if (args[0] != '%' || args[1] != '\0')
return -EINVAL;
- if (bstop < 0 || bstop >= cache->bcull_percent)
+ if (bstop >= cache->bcull_percent)
return cachefiles_daemon_range_error(cache, args);
cache->bstop_percent = bstop;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 7ff866dbb89e..54ac0e8ad96c 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -38,7 +38,7 @@ static const struct cifs_sid sid_everyone = {
1, 1, {0, 0, 0, 0, 0, 1}, {0} };
/* security id for Authenticated Users system group */
static const struct cifs_sid sid_authusers = {
- 1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11)} };
+ 1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} };
/* group users */
static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 7d4361f40c23..692d79f2e104 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2477,14 +2477,14 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
}
parm_data = (struct cifs_posix_lock *)
((char *)&pSMBr->hdr.Protocol + data_offset);
- if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK))
+ if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
pLockData->fl_type = F_UNLCK;
else {
if (parm_data->lock_type ==
- __constant_cpu_to_le16(CIFS_RDLCK))
+ cpu_to_le16(CIFS_RDLCK))
pLockData->fl_type = F_RDLCK;
else if (parm_data->lock_type ==
- __constant_cpu_to_le16(CIFS_WRLCK))
+ cpu_to_le16(CIFS_WRLCK))
pLockData->fl_type = F_WRLCK;
pLockData->fl_start = le64_to_cpu(parm_data->start);
@@ -3276,25 +3276,25 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
pSMB->TotalParameterCount = 0;
- pSMB->TotalDataCount = __constant_cpu_to_le32(2);
+ pSMB->TotalDataCount = cpu_to_le32(2);
pSMB->MaxParameterCount = 0;
pSMB->MaxDataCount = 0;
pSMB->MaxSetupCount = 4;
pSMB->Reserved = 0;
pSMB->ParameterOffset = 0;
- pSMB->DataCount = __constant_cpu_to_le32(2);
+ pSMB->DataCount = cpu_to_le32(2);
pSMB->DataOffset =
cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req,
compression_state) - 4); /* 84 */
pSMB->SetupCount = 4;
- pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL);
+ pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
pSMB->ParameterCount = 0;
- pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION);
+ pSMB->FunctionCode = cpu_to_le32(FSCTL_SET_COMPRESSION);
pSMB->IsFsctl = 1; /* FSCTL */
pSMB->IsRootFlag = 0;
pSMB->Fid = fid; /* file handle always le */
/* 3 byte pad, followed by 2 byte compress state */
- pSMB->ByteCount = __constant_cpu_to_le16(5);
+ pSMB->ByteCount = cpu_to_le16(5);
inc_rfc1001_len(pSMB, 5);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3430,10 +3430,10 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
cifs_acl->version = cpu_to_le16(1);
if (acl_type == ACL_TYPE_ACCESS) {
cifs_acl->access_entry_count = cpu_to_le16(count);
- cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+ cifs_acl->default_entry_count = cpu_to_le16(0xFFFF);
} else if (acl_type == ACL_TYPE_DEFAULT) {
cifs_acl->default_entry_count = cpu_to_le16(count);
- cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+ cifs_acl->access_entry_count = cpu_to_le16(0xFFFF);
} else {
cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
return 0;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 01a6339a554c..03558d432f7a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1058,7 +1058,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
- buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
if (!buf) {
free_xid(xid);
return -ENOMEM;
@@ -1393,7 +1393,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
max_num = (max_buf - sizeof(struct smb_hdr)) /
sizeof(LOCKING_ANDX_RANGE);
- buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
if (!buf)
return -ENOMEM;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 39ee32688eac..39b850786e12 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -46,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
USHRT_MAX));
pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
- pSMB->req.VcNumber = __constant_cpu_to_le16(1);
+ pSMB->req.VcNumber = cpu_to_le16(1);
/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 3f17b4550831..e5100b8fb3a0 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -111,7 +111,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
return -EINVAL;
max_num = max_buf / sizeof(struct smb2_lock_element);
- buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -247,7 +247,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
}
max_num = max_buf / sizeof(struct smb2_lock_element);
- buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+ buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
if (!buf) {
free_xid(xid);
return -ENOMEM;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index b8021fde987d..36867bd21d04 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -67,27 +67,27 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
* indexed by command in host byte order
*/
static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
- /* SMB2_NEGOTIATE */ __constant_cpu_to_le16(65),
- /* SMB2_SESSION_SETUP */ __constant_cpu_to_le16(9),
- /* SMB2_LOGOFF */ __constant_cpu_to_le16(4),
- /* SMB2_TREE_CONNECT */ __constant_cpu_to_le16(16),
- /* SMB2_TREE_DISCONNECT */ __constant_cpu_to_le16(4),
- /* SMB2_CREATE */ __constant_cpu_to_le16(89),
- /* SMB2_CLOSE */ __constant_cpu_to_le16(60),
- /* SMB2_FLUSH */ __constant_cpu_to_le16(4),
- /* SMB2_READ */ __constant_cpu_to_le16(17),
- /* SMB2_WRITE */ __constant_cpu_to_le16(17),
- /* SMB2_LOCK */ __constant_cpu_to_le16(4),
- /* SMB2_IOCTL */ __constant_cpu_to_le16(49),
+ /* SMB2_NEGOTIATE */ cpu_to_le16(65),
+ /* SMB2_SESSION_SETUP */ cpu_to_le16(9),
+ /* SMB2_LOGOFF */ cpu_to_le16(4),
+ /* SMB2_TREE_CONNECT */ cpu_to_le16(16),
+ /* SMB2_TREE_DISCONNECT */ cpu_to_le16(4),
+ /* SMB2_CREATE */ cpu_to_le16(89),
+ /* SMB2_CLOSE */ cpu_to_le16(60),
+ /* SMB2_FLUSH */ cpu_to_le16(4),
+ /* SMB2_READ */ cpu_to_le16(17),
+ /* SMB2_WRITE */ cpu_to_le16(17),
+ /* SMB2_LOCK */ cpu_to_le16(4),
+ /* SMB2_IOCTL */ cpu_to_le16(49),
/* BB CHECK this ... not listed in documentation */
- /* SMB2_CANCEL */ __constant_cpu_to_le16(0),
- /* SMB2_ECHO */ __constant_cpu_to_le16(4),
- /* SMB2_QUERY_DIRECTORY */ __constant_cpu_to_le16(9),
- /* SMB2_CHANGE_NOTIFY */ __constant_cpu_to_le16(9),
- /* SMB2_QUERY_INFO */ __constant_cpu_to_le16(9),
- /* SMB2_SET_INFO */ __constant_cpu_to_le16(2),
+ /* SMB2_CANCEL */ cpu_to_le16(0),
+ /* SMB2_ECHO */ cpu_to_le16(4),
+ /* SMB2_QUERY_DIRECTORY */ cpu_to_le16(9),
+ /* SMB2_CHANGE_NOTIFY */ cpu_to_le16(9),
+ /* SMB2_QUERY_INFO */ cpu_to_le16(9),
+ /* SMB2_SET_INFO */ cpu_to_le16(2),
/* BB FIXME can also be 44 for lease break */
- /* SMB2_OPLOCK_BREAK */ __constant_cpu_to_le16(24)
+ /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
};
int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 081529f316e3..59437c50472f 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -590,7 +590,7 @@ smb2_clone_range(const unsigned int xid,
goto cchunk_out;
/* For now array only one chunk long, will make more flexible later */
- pcchunk->ChunkCount = __constant_cpu_to_le32(1);
+ pcchunk->ChunkCount = cpu_to_le32(1);
pcchunk->Reserved = 0;
pcchunk->Reserved2 = 0;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 768cddb72bdb..2057250c76d3 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1355,7 +1355,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
char *ret_data = NULL;
fsctl_input.CompressionState =
- __constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+ cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
FSCTL_SET_COMPRESSION, true /* is_fsctl */,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 69f3595d3952..d03adad014db 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -85,7 +85,7 @@
/* BB FIXME - analyze following length BB */
#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
-#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
+#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
/*
* SMB2 Header Definition
@@ -96,7 +96,7 @@
*
*/
-#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
+#define SMB2_HEADER_STRUCTURE_SIZE cpu_to_le16(64)
struct smb2_hdr {
__be32 smb2_buf_length; /* big endian on wire */
@@ -137,16 +137,16 @@ struct smb2_transform_hdr {
} __packed;
/* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM __constant_cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001)
/*
* SMB2 flag definitions
*/
-#define SMB2_FLAGS_SERVER_TO_REDIR __constant_cpu_to_le32(0x00000001)
-#define SMB2_FLAGS_ASYNC_COMMAND __constant_cpu_to_le32(0x00000002)
-#define SMB2_FLAGS_RELATED_OPERATIONS __constant_cpu_to_le32(0x00000004)
-#define SMB2_FLAGS_SIGNED __constant_cpu_to_le32(0x00000008)
-#define SMB2_FLAGS_DFS_OPERATIONS __constant_cpu_to_le32(0x10000000)
+#define SMB2_FLAGS_SERVER_TO_REDIR cpu_to_le32(0x00000001)
+#define SMB2_FLAGS_ASYNC_COMMAND cpu_to_le32(0x00000002)
+#define SMB2_FLAGS_RELATED_OPERATIONS cpu_to_le32(0x00000004)
+#define SMB2_FLAGS_SIGNED cpu_to_le32(0x00000008)
+#define SMB2_FLAGS_DFS_OPERATIONS cpu_to_le32(0x10000000)
/*
* Definitions for SMB2 Protocol Data Units (network frames)
@@ -157,7 +157,7 @@ struct smb2_transform_hdr {
*
*/
-#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
+#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
struct smb2_err_rsp {
struct smb2_hdr hdr;
@@ -500,12 +500,12 @@ struct create_context {
#define SMB2_LEASE_HANDLE_CACHING_HE 0x02
#define SMB2_LEASE_WRITE_CACHING_HE 0x04
-#define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00)
-#define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01)
-#define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02)
-#define SMB2_LEASE_WRITE_CACHING __constant_cpu_to_le32(0x04)
+#define SMB2_LEASE_NONE cpu_to_le32(0x00)
+#define SMB2_LEASE_READ_CACHING cpu_to_le32(0x01)
+#define SMB2_LEASE_HANDLE_CACHING cpu_to_le32(0x02)
+#define SMB2_LEASE_WRITE_CACHING cpu_to_le32(0x04)
-#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02)
#define SMB2_LEASE_KEY_SIZE 16
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 1da168c61d35..278f8fdeb9ef 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -13,7 +13,7 @@
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/list.h>
#include <linux/sched.h>
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 2849f41e72a2..1326d38960db 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -13,7 +13,7 @@
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/coda.h>
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index cd8a63238b11..9c3dedc000d1 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -19,8 +19,7 @@
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/namei.h>
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 9e83b7790212..d244d743a232 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -18,7 +18,7 @@
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index fe3afb2de880..b945410bfcd5 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -21,9 +21,7 @@
#include <linux/vfs.h>
#include <linux/slab.h>
#include <linux/pid_namespace.h>
-
-#include <asm/uaccess.h>
-
+#include <linux/uaccess.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 3f5de96bbb58..4326d172fc27 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -16,7 +16,7 @@
#include <linux/string.h>
#include <linux/namei.h>
#include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 5c1e4242368b..822629126e89 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -40,7 +40,7 @@
#include <linux/pid_namespace.h>
#include <asm/io.h>
#include <asm/poll.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/coda.h>
#include <linux/coda_psdev.h>
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 21fcf8dcb9cd..5bb6e27298a4 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -27,7 +27,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <linux/vfs.h>
diff --git a/fs/compat.c b/fs/compat.c
index 66d3d3c6b4b2..6917fdba382c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -562,7 +562,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
goto out;
ret = -EINVAL;
- if (nr_segs > UIO_MAXIOV || nr_segs < 0)
+ if (nr_segs > UIO_MAXIOV)
goto out;
if (nr_segs > fast_segs) {
ret = -ENOMEM;
diff --git a/fs/coredump.c b/fs/coredump.c
index 0b2528fb640e..a93f7e6ea4cf 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -306,7 +306,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
if (unlikely(nr < 0))
return nr;
- tsk->flags = PF_DUMPCORE;
+ tsk->flags |= PF_DUMPCORE;
if (atomic_read(&mm->mm_users) == nr + 1)
goto done;
/*
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index ddcfe590b8a8..355c522f3585 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -11,6 +11,8 @@
* The actual compression is based on zlib, see the other files.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -21,7 +23,7 @@
#include <linux/vfs.h>
#include <linux/mutex.h>
#include <uapi/linux/cramfs_fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "internal.h"
@@ -153,7 +155,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE];
static unsigned buffer_blocknr[READ_BUFFERS];
-static struct super_block * buffer_dev[READ_BUFFERS];
+static struct super_block *buffer_dev[READ_BUFFERS];
static int next_buffer;
/*
@@ -205,6 +207,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
for (i = 0; i < BLKS_PER_BUF; i++) {
struct page *page = pages[i];
+
if (page) {
wait_on_page_locked(page);
if (!PageUptodate(page)) {
@@ -223,6 +226,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
data = read_buffers[buffer];
for (i = 0; i < BLKS_PER_BUF; i++) {
struct page *page = pages[i];
+
if (page) {
memcpy(data, kmap(page), PAGE_CACHE_SIZE);
kunmap(page);
@@ -237,6 +241,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
static void cramfs_kill_sb(struct super_block *sb)
{
struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
+
kill_block_super(sb);
kfree(sbi);
}
@@ -277,7 +282,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
/* check for wrong endianness */
if (super.magic == CRAMFS_MAGIC_WEND) {
if (!silent)
- printk(KERN_ERR "cramfs: wrong endianness\n");
+ pr_err("wrong endianness\n");
return -EINVAL;
}
@@ -287,22 +292,22 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
mutex_unlock(&read_mutex);
if (super.magic != CRAMFS_MAGIC) {
if (super.magic == CRAMFS_MAGIC_WEND && !silent)
- printk(KERN_ERR "cramfs: wrong endianness\n");
+ pr_err("wrong endianness\n");
else if (!silent)
- printk(KERN_ERR "cramfs: wrong magic\n");
+ pr_err("wrong magic\n");
return -EINVAL;
}
}
/* get feature flags first */
if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) {
- printk(KERN_ERR "cramfs: unsupported filesystem features\n");
+ pr_err("unsupported filesystem features\n");
return -EINVAL;
}
/* Check that the root inode is in a sane state */
if (!S_ISDIR(super.root.mode)) {
- printk(KERN_ERR "cramfs: root is not a directory\n");
+ pr_err("root is not a directory\n");
return -EINVAL;
}
/* correct strange, hard-coded permissions of mkcramfs */
@@ -310,23 +315,23 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
root_offset = super.root.offset << 2;
if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) {
- sbi->size=super.size;
- sbi->blocks=super.fsid.blocks;
- sbi->files=super.fsid.files;
+ sbi->size = super.size;
+ sbi->blocks = super.fsid.blocks;
+ sbi->files = super.fsid.files;
} else {
- sbi->size=1<<28;
- sbi->blocks=0;
- sbi->files=0;
+ sbi->size = 1<<28;
+ sbi->blocks = 0;
+ sbi->files = 0;
}
- sbi->magic=super.magic;
- sbi->flags=super.flags;
+ sbi->magic = super.magic;
+ sbi->flags = super.flags;
if (root_offset == 0)
- printk(KERN_INFO "cramfs: empty filesystem");
+ pr_info("empty filesystem");
else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
((root_offset != sizeof(struct cramfs_super)) &&
(root_offset != 512 + sizeof(struct cramfs_super))))
{
- printk(KERN_ERR "cramfs: bad root offset %lu\n", root_offset);
+ pr_err("bad root offset %lu\n", root_offset);
return -EINVAL;
}
@@ -425,7 +430,7 @@ static int cramfs_readdir(struct file *file, struct dir_context *ctx)
/*
* Lookup and fill in the inode data..
*/
-static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+static struct dentry *cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
unsigned int offset = 0;
struct inode *inode = NULL;
@@ -483,7 +488,7 @@ out:
return NULL;
}
-static int cramfs_readpage(struct file *file, struct page * page)
+static int cramfs_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
u32 maxblock;
@@ -511,7 +516,7 @@ static int cramfs_readpage(struct file *file, struct page * page)
if (compr_len == 0)
; /* hole */
else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) {
- pr_err("cramfs: bad compressed blocksize %u\n",
+ pr_err("bad compressed blocksize %u\n",
compr_len);
goto err;
} else {
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 1760c1b84d97..ec4f1d4fdad0 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -15,6 +15,8 @@
* then is used by multiple filesystems.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/vmalloc.h>
@@ -37,7 +39,7 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
err = zlib_inflateReset(&stream);
if (err != Z_OK) {
- printk("zlib_inflateReset error %d\n", err);
+ pr_err("zlib_inflateReset error %d\n", err);
zlib_inflateEnd(&stream);
zlib_inflateInit(&stream);
}
@@ -48,8 +50,8 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
return stream.total_out;
err:
- printk("Error %d while decompressing!\n", err);
- printk("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen);
+ pr_err("Error %d while decompressing!\n", err);
+ pr_err("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen);
return -EIO;
}
@@ -57,7 +59,7 @@ int cramfs_uncompress_init(void)
{
if (!initialized++) {
stream.workspace = vmalloc(zlib_inflate_workspacesize());
- if ( !stream.workspace ) {
+ if (!stream.workspace) {
initialized = 0;
return -ENOMEM;
}
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 8d77ba7b1756..1323c568e362 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -718,16 +718,11 @@ static const struct file_operations waiters_fops = {
void dlm_delete_debug_file(struct dlm_ls *ls)
{
- if (ls->ls_debug_rsb_dentry)
- debugfs_remove(ls->ls_debug_rsb_dentry);
- if (ls->ls_debug_waiters_dentry)
- debugfs_remove(ls->ls_debug_waiters_dentry);
- if (ls->ls_debug_locks_dentry)
- debugfs_remove(ls->ls_debug_locks_dentry);
- if (ls->ls_debug_all_dentry)
- debugfs_remove(ls->ls_debug_all_dentry);
- if (ls->ls_debug_toss_dentry)
- debugfs_remove(ls->ls_debug_toss_dentry);
+ debugfs_remove(ls->ls_debug_rsb_dentry);
+ debugfs_remove(ls->ls_debug_waiters_dentry);
+ debugfs_remove(ls->ls_debug_locks_dentry);
+ debugfs_remove(ls->ls_debug_all_dentry);
+ debugfs_remove(ls->ls_debug_toss_dentry);
}
int dlm_create_debug_file(struct dlm_ls *ls)
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 356c044e2cd3..bbee8f063dfa 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -12,7 +12,8 @@
#include "efs.h"
-static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) {
+static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
+{
struct buffer_head *bh;
int slot, namelen;
@@ -40,10 +41,10 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
if (be16_to_cpu(dirblock->magic) != EFS_DIRBLK_MAGIC) {
pr_err("%s(): invalid directory block\n", __func__);
brelse(bh);
- return(0);
+ return 0;
}
- for(slot = 0; slot < dirblock->slots; slot++) {
+ for (slot = 0; slot < dirblock->slots; slot++) {
dirslot = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot));
namelen = dirslot->namelen;
@@ -52,12 +53,12 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
if ((namelen == len) && (!memcmp(name, nameptr, len))) {
inodenum = be32_to_cpu(dirslot->inode);
brelse(bh);
- return(inodenum);
+ return inodenum;
}
}
brelse(bh);
}
- return(0);
+ return 0;
}
struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
diff --git a/fs/exec.c b/fs/exec.c
index a3d33fe592d6..2ef2751f5a8d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -368,10 +368,6 @@ static int bprm_mm_init(struct linux_binprm *bprm)
if (!mm)
goto err;
- err = init_new_context(current, mm);
- if (err)
- goto err;
-
err = __bprm_mm_init(bprm);
if (err)
goto err;
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 7f20f25c232c..84529b8a331b 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -116,7 +116,7 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width,
num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa,
pages_in_unit - i);
- __a1pa = kzalloc(num_a1pa * sizeof__a1pa, GFP_KERNEL);
+ __a1pa = kcalloc(num_a1pa, sizeof__a1pa, GFP_KERNEL);
if (unlikely(!__a1pa)) {
ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n",
num_a1pa);
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a8bc47f75fa0..5b6e9f246233 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -107,7 +107,10 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
if (!journal) {
- ret = generic_file_fsync(file, start, end, datasync);
+ if (test_opt(inode->i_sb, BARRIER))
+ ret = generic_file_fsync(file, start, end, datasync);
+ else
+ ret = __generic_file_fsync(file, start, end, datasync);
if (!ret && !hlist_empty(&inode->i_dentry))
ret = ext4_sync_parent(inode);
goto out;
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index a31b83c5cbd9..b39d487ccfb0 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -67,7 +67,7 @@ static int fscache_max_active_sysctl(struct ctl_table *table, int write,
return ret;
}
-struct ctl_table fscache_sysctls[] = {
+static struct ctl_table fscache_sysctls[] = {
{
.procname = "object_max_active",
.data = &fscache_object_max_active,
@@ -87,7 +87,7 @@ struct ctl_table fscache_sysctls[] = {
{}
};
-struct ctl_table fscache_sysctls_root[] = {
+static struct ctl_table fscache_sysctls_root[] = {
{
.procname = "fscache",
.mode = 0555,
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 32602c667b4a..7892e6fddb66 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
}
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
- u32 parent, struct qstr *str)
+/* Generates key for catalog file/folders record. */
+int hfsplus_cat_build_key(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent, struct qstr *str)
{
- int len;
+ int len, err;
key->cat.parent = cpu_to_be32(parent);
- if (str) {
- hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
- str->name, str->len);
- len = be16_to_cpu(key->cat.name.length);
- } else {
- key->cat.name.length = 0;
- len = 0;
- }
+ err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
+ str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
+ len = be16_to_cpu(key->cat.name.length);
key->key_len = cpu_to_be16(6 + 2 * len);
+ return 0;
+}
+
+/* Generates key for catalog thread record. */
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent)
+{
+ key->cat.parent = cpu_to_be32(parent);
+ key->cat.name.length = 0;
+ key->key_len = cpu_to_be16(6);
}
static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent,
@@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
hfsplus_cat_entry *entry, int type,
u32 parentid, struct qstr *str)
{
+ int err;
+
entry->type = cpu_to_be16(type);
entry->thread.reserved = 0;
entry->thread.parentID = cpu_to_be32(parentid);
- hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
+ err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
}
@@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
int err;
u16 type;
- hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid);
err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry));
if (err)
return err;
@@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
return err;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry,
S_ISDIR(inode->i_mode) ?
HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
dir->i_ino, str);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto err2;
+ }
+
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
goto err2;
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto err1;
+
entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
@@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
return 0;
err1:
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
hfs_brec_remove(&fd);
err2:
@@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (!str) {
int len;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
off + 2, len);
fd.search_key->key_len = cpu_to_be16(6 + len);
} else
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto out;
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
@@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (err)
goto out;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_fd = src_fd;
/* find the old dir entry and read the data */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid,
type = be16_to_cpu(entry.type);
/* create new dir entry with the data from the old entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
+ err = hfsplus_cat_build_key(sb, dst_fd.search_key,
+ dst_dir->i_ino, dst_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
/* finally remove the old entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid,
src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
/* remove old thread entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid,
goto out;
/* create new thread entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
dst_dir->i_ino, dst_name);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto out;
+ }
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 610a3260bef1..435bea231cc6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -44,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
if (err)
return ERR_PTR(err);
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino,
+ &dentry->d_name);
+ if (unlikely(err < 0))
+ goto fail;
again:
err = hfs_brec_read(&fd, &entry, sizeof(entry));
if (err) {
@@ -97,9 +100,11 @@ again:
be32_to_cpu(entry.file.permissions.dev);
str.len = sprintf(name, "iNode%d", linkid);
str.name = name;
- hfsplus_cat_build_key(sb, fd.search_key,
+ err = hfsplus_cat_build_key(sb, fd.search_key,
HFSPLUS_SB(sb)->hidden_dir->i_ino,
&str);
+ if (unlikely(err < 0))
+ goto fail;
goto again;
}
} else if (!dentry->d_fsdata)
@@ -145,7 +150,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
err = -ENOMEM;
goto out;
}
- hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index eb5e059f481a..b0441d65fa54 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -443,8 +443,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1,
const hfsplus_btree_key *k2);
int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
const hfsplus_btree_key *k2);
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
+int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
u32 parent, struct qstr *str);
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent);
void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms);
int hfsplus_find_cat(struct super_block *sb, u32 cnid,
struct hfs_find_data *fd);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4cf2024b87da..593af2fdcc2d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -515,7 +515,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
err = hfs_find_init(sbi->cat_tree, &fd);
if (err)
goto out_put_root;
- hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ if (unlikely(err < 0))
+ goto out_put_root;
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index f36fc010fccb..2923a7bd82ac 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -545,12 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
struct dnode *d1;
struct quad_buffer_head qbh1;
if (hpfs_sb(i->i_sb)->sb_chk)
- if (up != i->i_ino) {
- hpfs_error(i->i_sb,
- "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
- dno, up, (unsigned long)i->i_ino);
- return;
- }
+ if (up != i->i_ino) {
+ hpfs_error(i->i_sb,
+ "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
+ dno, up,
+ (unsigned long)i->i_ino);
+ return;
+ }
if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
d1->up = cpu_to_le32(up);
d1->root_dnode = 1;
@@ -1061,8 +1062,8 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
hpfs_brelse4(qbh);
if (hpfs_sb(s)->sb_chk)
if (hpfs_stop_cycles(s, dno, &c1, &c2, "map_fnode_dirent #1")) {
- kfree(name2);
- return NULL;
+ kfree(name2);
+ return NULL;
}
goto go_down;
}
diff --git a/fs/isofs/Makefile b/fs/isofs/Makefile
index bf162f0942d5..47a68e357512 100644
--- a/fs/isofs/Makefile
+++ b/fs/isofs/Makefile
@@ -8,3 +8,5 @@ isofs-objs-y := namei.o inode.o dir.o util.o rock.o export.o
isofs-objs-$(CONFIG_JOLIET) += joliet.o
isofs-objs-$(CONFIG_ZISOFS) += compress.o
isofs-objs := $(isofs-objs-y)
+
+# ccflags-y := -DDEBUG_FLAGS=1
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 592e5115a561..c9ba688dc407 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -15,6 +15,8 @@
*
* Transparent decompression of files on an iso9660 filesystem
*/
+#define DEBUG
+#define pr_fmt(fmt) "zisofs: " fmt
#include <linux/module.h>
#include <linux/init.h>
@@ -110,7 +112,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
*errp = -ENOMEM;
else
*errp = -EIO;
- printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
+ pr_debug("zisofs_inflateInit returned %d\n",
zerr);
goto z_eio;
}
@@ -154,12 +156,12 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
if (zerr == Z_MEM_ERROR)
*errp = -ENOMEM;
else {
- printk(KERN_DEBUG
+ pr_debug(
"zisofs: zisofs_inflate returned"
" %d, inode = %lu,"
" page idx = %d, bh idx = %d,"
- " avail_in = %d,"
- " avail_out = %d\n",
+ " avail_in = %ld,"
+ " avail_out = %ld\n",
zerr, inode->i_ino, curpage,
curbh, stream.avail_in,
stream.avail_out);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index 12088d8de3fa..44d1053dfad5 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -12,7 +12,7 @@
* Documentation/filesystems/nfs/Exporting
* fs/exportfs/expfs.c.
*/
-
+#define pr_fmt(fmt) "ISOFS: " fmt
#include "isofs.h"
static struct dentry *
@@ -52,8 +52,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
/* "child" must always be a directory. */
if (!S_ISDIR(child_inode->i_mode)) {
- printk(KERN_ERR "isofs: isofs_export_get_parent(): "
- "child is not a directory!\n");
+ pr_err("%s(): child is not a directory!\n", __func__);
rv = ERR_PTR(-EACCES);
goto out;
}
@@ -62,8 +61,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
* it is not zero, it means the directory failed to be
* normalized for some reason. */
if (e_child_inode->i_iget5_offset != 0) {
- printk(KERN_ERR "isofs: isofs_export_get_parent(): "
- "child directory not normalized!\n");
+ pr_err("isofs_export_get_parent(): child directory not normalized!\n");
rv = ERR_PTR(-EACCES);
goto out;
}
@@ -89,8 +87,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
/* Verify it is in fact the ".." entry. */
if ((isonum_711(de->name_len) != 1) || (de->name[0] != 1)) {
- printk(KERN_ERR "isofs: Unable to find the \"..\" "
- "directory for NFS.\n");
+ pr_err("Unable to find the \"..\" directory for NFS.\n");
rv = ERR_PTR(-EACCES);
goto out;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4556ce1af5b0..cc23d86e174a 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -10,6 +10,8 @@
* 2004 Paul Serice - Inode Support pushed out from 4GB to 128GB
* 2004 Paul Serice - NFS Export Operations
*/
+#define DEBUG
+#define pr_fmt(fmt) "ISOFS: " fmt
#include <linux/init.h>
#include <linux/module.h>
@@ -528,23 +530,25 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
Te.cdte_format=CDROM_LBA;
i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te);
if (!i) {
- printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n",
+ pr_debug("Session %d start %d type %d\n",
session, Te.cdte_addr.lba,
Te.cdte_ctrl&CDROM_DATA_TRACK);
if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4)
return Te.cdte_addr.lba;
}
- printk(KERN_ERR "ISOFS: Invalid session number or type of track\n");
+ pr_err("Invalid session number or type of track\n");
}
i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
if (session > 0)
- printk(KERN_ERR "ISOFS: Invalid session number\n");
+ pr_err("Invalid session number\n");
#if 0
- printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
+ pr_debug("isofs.inode: CDROMMULTISESSION: rc=%d\n", i);
if (i==0) {
- printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no");
- printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba);
+ pr_debug("isofs.inode: XA disk: %s\n",
+ ms_info.xa_flag?"yes":"no");
+ pr_debug("isofs.inode: vol_desc_start = %d\n",
+ ms_info.addr.lba);
}
#endif
if (i==0)
@@ -672,8 +676,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
else if (sec->escape[2] == 0x45)
joliet_level = 3;
- printk(KERN_DEBUG "ISO 9660 Extensions: "
- "Microsoft Joliet Level %d\n",
+ pr_debug("ISO 9660 Extensions: Microsoft Joliet Level %d\n",
joliet_level);
}
goto root_found;
@@ -771,11 +774,11 @@ root_found:
isonum_711(rootp->ext_attr_length);
sbi->s_firstdatazone = first_data_zone;
#ifndef BEQUIET
- printk(KERN_DEBUG "ISOFS: Max size:%ld Log zone size:%ld\n",
+ pr_debug("Max size:%ld Log zone size:%ld\n",
sbi->s_max_size, 1UL << sbi->s_log_zone_size);
- printk(KERN_DEBUG "ISOFS: First datazone:%ld\n", sbi->s_firstdatazone);
+ pr_debug("First datazone:%ld\n", sbi->s_firstdatazone);
if(sbi->s_high_sierra)
- printk(KERN_DEBUG "ISOFS: Disc in High Sierra format.\n");
+ pr_debug("Disc in High Sierra format.\n");
#endif
/*
@@ -878,9 +881,7 @@ root_found:
*/
if (sbi->s_rock == 1 && joliet_level &&
rootdir_empty(s, sbi->s_firstdatazone)) {
- printk(KERN_NOTICE
- "ISOFS: primary root directory is empty. "
- "Disabling Rock Ridge and switching to Joliet.");
+ pr_notice("primary root directory is empty. Disabling Rock Ridge and switching to Joliet.");
sbi->s_rock = 0;
}
@@ -898,8 +899,7 @@ root_found:
sbi->s_rock = 0;
if (sbi->s_firstdatazone != first_data_zone) {
sbi->s_firstdatazone = first_data_zone;
- printk(KERN_DEBUG
- "ISOFS: changing to secondary root\n");
+ pr_debug("changing to secondary root\n");
iput(inode);
inode = isofs_iget(s, sbi->s_firstdatazone, 0);
if (IS_ERR(inode))
@@ -918,9 +918,8 @@ root_found:
/* Make sure the root inode is a directory */
if (!S_ISDIR(inode->i_mode)) {
- printk(KERN_WARNING
- "isofs_fill_super: root inode is not a directory. "
- "Corrupted media?\n");
+ pr_warn("%s: root inode is not a directory. Corrupted media?\n",
+ __func__);
goto out_iput;
}
@@ -952,27 +951,26 @@ out_iput:
out_no_root:
error = PTR_ERR(inode);
if (error != -ENOMEM)
- printk(KERN_WARNING "%s: get root inode failed\n", __func__);
+ pr_warn("%s: get root inode failed\n", __func__);
out_no_inode:
#ifdef CONFIG_JOLIET
unload_nls(sbi->s_nls_iocharset);
#endif
goto out_freesbi;
out_no_read:
- printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
+ pr_warn("%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
__func__, s->s_id, iso_blknum, block);
goto out_freebh;
out_bad_zone_size:
- printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n",
- sbi->s_log_zone_size);
+ pr_warn("Bad logical zone size %ld\n", sbi->s_log_zone_size);
goto out_freebh;
out_bad_size:
- printk(KERN_WARNING "ISOFS: Logical zone size(%d) < hardware blocksize(%u)\n",
+ pr_warn("Logical zone size(%d) < hardware blocksize(%u)\n",
orig_zonesize, opt.blocksize);
goto out_freebh;
out_unknown_format:
if (!silent)
- printk(KERN_WARNING "ISOFS: Unable to identify CD-ROM format.\n");
+ pr_warn("Unable to identify CD-ROM format.\n");
out_freebh:
brelse(bh);
@@ -1021,7 +1019,7 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
error = -EIO;
rv = 0;
if (iblock != b_off) {
- printk(KERN_DEBUG "%s: block number too large\n", __func__);
+ pr_debug("%s: block number too large\n", __func__);
goto abort;
}
@@ -1042,7 +1040,7 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
* I/O errors.
*/
if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
- printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
+ pr_debug("%s: block >= EOF (%lu, %llu)\n",
__func__, b_off,
(unsigned long long)inode->i_size);
goto abort;
@@ -1068,12 +1066,11 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
iput(ninode);
if (++section > 100) {
- printk(KERN_DEBUG "%s: More than 100 file sections ?!?"
- " aborting...\n", __func__);
- printk(KERN_DEBUG "%s: block=%lu firstext=%u sect_size=%u "
- "nextblk=%lu nextoff=%lu\n", __func__,
- b_off, firstext, (unsigned) sect_size,
- nextblk, nextoff);
+ pr_debug("%s: More than 100 file sections ?!? aborting...\n",
+ __func__);
+ pr_debug("%s: block=%lu firstext=%u sect_size=%u nextblk=%lu nextoff=%lu\n",
+ __func__, b_off, firstext,
+ (unsigned) sect_size, nextblk, nextoff);
goto abort;
}
}
@@ -1105,7 +1102,7 @@ static int isofs_get_block(struct inode *inode, sector_t iblock,
int ret;
if (create) {
- printk(KERN_DEBUG "%s: Kernel tries to allocate a block\n", __func__);
+ pr_debug("%s: Kernel tries to allocate a block\n", __func__);
return -EROFS;
}
@@ -1248,13 +1245,12 @@ out_nomem:
return -ENOMEM;
out_noread:
- printk(KERN_INFO "ISOFS: unable to read i-node block %lu\n", block);
+ pr_info("unable to read i-node block %lu\n", block);
kfree(tmpde);
return -EIO;
out_toomany:
- printk(KERN_INFO "%s: More than 100 file sections ?!?, aborting...\n"
- "isofs_read_level3_size: inode=%lu\n",
+ pr_info("%s: More than 100 file sections ?!?, aborting...\n isofs_read_level3_size: inode=%lu\n",
__func__, inode->i_ino);
goto out;
}
@@ -1289,7 +1285,7 @@ static int isofs_read_inode(struct inode *inode)
tmpde = kmalloc(de_len, GFP_KERNEL);
if (tmpde == NULL) {
- printk(KERN_INFO "%s: out of memory\n", __func__);
+ pr_info("%s: out of memory\n", __func__);
ret = -ENOMEM;
goto fail;
}
@@ -1364,24 +1360,23 @@ static int isofs_read_inode(struct inode *inode)
inode->i_size &= 0x00ffffff;
if (de->interleave[0]) {
- printk(KERN_DEBUG "ISOFS: Interleaved files not (yet) supported.\n");
+ pr_debug("Interleaved files not (yet) supported.\n");
inode->i_size = 0;
}
/* I have no idea what file_unit_size is used for, so
we will flag it for now */
if (de->file_unit_size[0] != 0) {
- printk(KERN_DEBUG "ISOFS: File unit size != 0 for ISO file (%ld).\n",
- inode->i_ino);
+ pr_debug("File unit size != 0 for ISO file (%ld).\n",
+ inode->i_ino);
}
/* I have no idea what other flag bits are used for, so
we will flag it for now */
-#ifdef DEBUG
+#ifdef DEBUG_FLAGS
if((de->flags[-high_sierra] & ~2)!= 0){
- printk(KERN_DEBUG "ISOFS: Unusual flag settings for ISO file "
- "(%ld %x).\n",
- inode->i_ino, de->flags[-high_sierra]);
+ pr_debug("Unusual flag settings for ISO file (%ld %x).\n",
+ inode->i_ino, de->flags[-high_sierra]);
}
#endif
@@ -1450,7 +1445,7 @@ out:
return ret;
out_badread:
- printk(KERN_WARNING "ISOFS: unable to read i-node block\n");
+ pr_warn("unable to read i-node block\n");
fail:
goto out;
}
@@ -1541,6 +1536,7 @@ MODULE_ALIAS("iso9660");
static int __init init_iso9660_fs(void)
{
int err = init_inodecache();
+
if (err)
goto out;
#ifdef CONFIG_ZISOFS
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 95295640d9c8..c5ed09733112 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -113,9 +113,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
dpnt = de->name;
/* Basic sanity check, whether name doesn't exceed dir entry */
if (de_len < dlen + sizeof(struct iso_directory_record)) {
- printk(KERN_NOTICE "iso9660: Corrupted directory entry"
- " in block %lu of inode %lu\n", block,
- dir->i_ino);
+ pr_notice("Corrupted directory entry in block %lu of inode %lu\n",
+ block, dir->i_ino);
return 0;
}
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c0bf42472e40..b13119556e5d 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -5,6 +5,8 @@
*
* Rock Ridge Extensions to iso9660
*/
+#define DEBUG
+#define pr_fmt(fmt) "ISOFS: rock: " fmt
#include <linux/slab.h>
#include <linux/pagemap.h>
@@ -89,9 +91,8 @@ static int rock_continue(struct rock_state *rs)
if ((unsigned)rs->cont_offset > blocksize - min_de_size ||
(unsigned)rs->cont_size > blocksize ||
(unsigned)(rs->cont_offset + rs->cont_size) > blocksize) {
- printk(KERN_NOTICE "rock: corrupted directory entry. "
- "extent=%d, offset=%d, size=%d\n",
- rs->cont_extent, rs->cont_offset, rs->cont_size);
+ pr_notice("corrupted directory entry. extent=%d, offset=%d, size=%d\n",
+ rs->cont_extent, rs->cont_offset, rs->cont_size);
ret = -EIO;
goto out;
}
@@ -117,7 +118,7 @@ static int rock_continue(struct rock_state *rs)
rs->cont_offset = 0;
return 0;
}
- printk("Unable to read rock-ridge attributes\n");
+ pr_warn("Unable to read rock-ridge attributes\n");
}
out:
kfree(rs->buffer);
@@ -176,10 +177,9 @@ static int rock_check_overflow(struct rock_state *rs, int sig)
}
len += offsetof(struct rock_ridge, u);
if (len > rs->len) {
- printk(KERN_NOTICE "rock: directory entry would overflow "
- "storage\n");
- printk(KERN_NOTICE "rock: sig=0x%02x, size=%d, remaining=%d\n",
- sig, len, rs->len);
+ pr_notice("directory entry would overflow storage\n");
+ pr_notice("sig=0x%02x, size=%d, remaining=%d\n",
+ sig, len, rs->len);
return -EIO;
}
return 0;
@@ -257,7 +257,7 @@ repeat:
break;
if (rr->u.NM.flags & ~1) {
- printk("Unsupported NM flag settings (%d)\n",
+ pr_warn("Unsupported NM flag settings (%d)\n",
rr->u.NM.flags);
break;
}
@@ -353,13 +353,13 @@ repeat:
break;
case SIG('E', 'R'):
ISOFS_SB(inode->i_sb)->s_rock = 1;
- printk(KERN_DEBUG "ISO 9660 Extensions: ");
+ pr_debug("ISO 9660 Extensions: ");
{
int p;
for (p = 0; p < rr->u.ER.len_id; p++)
- printk("%c", rr->u.ER.data[p]);
+ pr_warn("%c", rr->u.ER.data[p]);
}
- printk("\n");
+ pr_warn("\n");
break;
case SIG('P', 'X'):
inode->i_mode = isonum_733(rr->u.PX.mode);
@@ -450,8 +450,7 @@ repeat:
inode->i_size += 1;
break;
default:
- printk("Symlink component flag "
- "not implemented\n");
+ pr_warn("Symlink component flag not implemented\n");
}
slen -= slp->len + 2;
oldslp = slp;
@@ -481,8 +480,7 @@ repeat:
symlink_len = inode->i_size;
break;
case SIG('R', 'E'):
- printk(KERN_WARNING "Attempt to read inode for "
- "relocated directory\n");
+ pr_warn("Attempt to read inode for relocated directory\n");
goto out;
case SIG('C', 'L'):
ISOFS_I(inode)->i_first_extent =
@@ -518,9 +516,7 @@ repeat:
int block_shift =
isonum_711(&rr->u.ZF.parms[1]);
if (block_shift > 17) {
- printk(KERN_WARNING "isofs: "
- "Can't handle ZF block "
- "size of 2^%d\n",
+ pr_warn("Can't handle ZF block size of 2^%d\n",
block_shift);
} else {
/*
@@ -543,9 +539,7 @@ repeat:
real_size);
}
} else {
- printk(KERN_WARNING
- "isofs: Unknown ZF compression "
- "algorithm: %c%c\n",
+ pr_warn("Unknown ZF compression algorithm: %c%c\n",
rr->u.ZF.algorithm[0],
rr->u.ZF.algorithm[1]);
}
@@ -604,7 +598,7 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
*rpnt++ = '/';
break;
default:
- printk("Symlink component flag not implemented (%d)\n",
+ pr_warn("Symlink component flag not implemented (%d)\n",
slp->flags);
}
slen -= slp->len + 2;
@@ -757,10 +751,10 @@ out:
kfree(rs.buffer);
goto fail;
out_noread:
- printk("unable to read i-node block");
+ pr_warn("unable to read i-node block");
goto fail;
out_bad_span:
- printk("symlink spans iso9660 blocks\n");
+ pr_warn("symlink spans iso9660 blocks\n");
fail:
brelse(bh);
error:
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 0b9a1e44e833..5698dae5d92d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -94,11 +94,12 @@ static int jffs2_zlib_compress(unsigned char *data_in,
while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) {
def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE);
- def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out);
- jffs2_dbg(1, "calling deflate with avail_in %d, avail_out %d\n",
+ def_strm.avail_in = min_t(unsigned long,
+ (*sourcelen-def_strm.total_in), def_strm.avail_out);
+ jffs2_dbg(1, "calling deflate with avail_in %ld, avail_out %ld\n",
def_strm.avail_in, def_strm.avail_out);
ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH);
- jffs2_dbg(1, "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n",
+ jffs2_dbg(1, "deflate returned with avail_in %ld, avail_out %ld, total_in %ld, total_out %ld\n",
def_strm.avail_in, def_strm.avail_out,
def_strm.total_in, def_strm.total_out);
if (ret != Z_OK) {
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 48140315f627..380d86e1ab45 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1019,11 +1019,11 @@ static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
/**
* logfs_is_valid_block - check whether this block is still valid
*
- * @sb - superblock
- * @ofs - block physical offset
- * @ino - block inode number
- * @bix - block index
- * @level - block level
+ * @sb: superblock
+ * @ofs: block physical offset
+ * @ino: block inode number
+ * @bix: block index
+ * @gc_level: block level
*
* Returns 0 if the block is invalid, 1 if it is valid and 2 if it will
* become invalid once the journal is written.
@@ -2226,10 +2226,9 @@ void btree_write_block(struct logfs_block *block)
*
* @inode: parent inode (ifile or directory)
* @buf: object to write (inode or dentry)
- * @n: object size
- * @_pos: object number (file position in blocks/objects)
+ * @count: object size
+ * @bix: block index
* @flags: write flags
- * @lock: 0 if write lock is already taken, 1 otherwise
* @shadow_tree: shadow below this inode
*
* FIXME: All caller of this put a 200-300 byte variable on the stack,
diff --git a/fs/mpage.c b/fs/mpage.c
index 5f9ed622274f..003f6fe3cdb6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -480,6 +480,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
@@ -588,7 +589,7 @@ page_is_mapped:
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (bio && mpd->last_block_in_bio != blocks[0] - 1)
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
alloc_new:
if (bio == NULL) {
@@ -612,7 +613,7 @@ alloc_new:
*/
length = first_unmapped << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
goto alloc_new;
}
@@ -622,7 +623,7 @@ alloc_new:
set_page_writeback(page);
unlock_page(page);
if (boundary || (first_unmapped != blocks_per_page)) {
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
@@ -634,7 +635,7 @@ alloc_new:
confused:
if (bio)
- bio = mpage_bio_submit(WRITE, bio);
+ bio = mpage_bio_submit(wr, bio);
if (mpd->use_writepage) {
ret = mapping->a_ops->writepage(page, wbc);
@@ -690,8 +691,11 @@ mpage_writepages(struct address_space *mapping,
};
ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
- if (mpd.bio)
- mpage_bio_submit(WRITE, mpd.bio);
+ if (mpd.bio) {
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : WRITE);
+ mpage_bio_submit(wr, mpd.bio);
+ }
}
blk_finish_plug(&plug);
return ret;
@@ -708,8 +712,11 @@ int mpage_writepage(struct page *page, get_block_t get_block,
.use_writepage = 0,
};
int ret = __mpage_writepage(page, wbc, &mpd);
- if (mpd.bio)
- mpage_bio_submit(WRITE, mpd.bio);
+ if (mpd.bio) {
+ int wr = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : WRITE);
+ mpage_bio_submit(wr, mpd.bio);
+ }
return ret;
}
EXPORT_SYMBOL(mpage_writepage);
diff --git a/fs/namespace.c b/fs/namespace.c
index b10db3d69943..019ff81536a2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -845,7 +845,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
list_splice(&head, n->list.prev);
if (shadows)
- hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+ hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
else
hlist_add_head_rcu(&mnt->mnt_hash,
m_hash(&parent->mnt, mnt->mnt_mountpoint));
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile
index 85c98737a146..fc603e0431bb 100644
--- a/fs/nilfs2/Makefile
+++ b/fs/nilfs2/Makefile
@@ -2,4 +2,4 @@ obj-$(CONFIG_NILFS2_FS) += nilfs2.o
nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \
btnode.o bmap.o btree.o direct.o dat.o recovery.o \
the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \
- ifile.o alloc.o gcinode.o ioctl.o
+ ifile.o alloc.o gcinode.o ioctl.o sysfs.o
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 9bc72dec3fa6..0696161bf59d 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -320,6 +320,14 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *);
int nilfs_init_gcinode(struct inode *inode);
void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs);
+/* sysfs.c */
+int __init nilfs_sysfs_init(void);
+void nilfs_sysfs_exit(void);
+int nilfs_sysfs_create_device_group(struct super_block *);
+void nilfs_sysfs_delete_device_group(struct the_nilfs *);
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *);
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *);
+
/*
* Inodes and files operations
*/
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8c532b2ca3ab..48c9ce8c983d 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1014,7 +1014,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
struct dentry *dentry;
int ret;
- if (cno < 0 || cno > nilfs->ns_cno)
+ if (cno > nilfs->ns_cno)
return false;
if (cno >= nilfs_last_cno(nilfs))
@@ -1452,13 +1452,19 @@ static int __init init_nilfs_fs(void)
if (err)
goto fail;
- err = register_filesystem(&nilfs_fs_type);
+ err = nilfs_sysfs_init();
if (err)
goto free_cachep;
+ err = register_filesystem(&nilfs_fs_type);
+ if (err)
+ goto deinit_sysfs_entry;
+
printk(KERN_INFO "NILFS version 2 loaded\n");
return 0;
+deinit_sysfs_entry:
+ nilfs_sysfs_exit();
free_cachep:
nilfs_destroy_cachep();
fail:
@@ -1468,6 +1474,7 @@ fail:
static void __exit exit_nilfs_fs(void)
{
nilfs_destroy_cachep();
+ nilfs_sysfs_exit();
unregister_filesystem(&nilfs_fs_type);
}
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
new file mode 100644
index 000000000000..bbb0dcc35905
--- /dev/null
+++ b/fs/nilfs2/sysfs.c
@@ -0,0 +1,1137 @@
+/*
+ * sysfs.c - sysfs support implementation.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#include <linux/kobject.h>
+
+#include "nilfs.h"
+#include "mdt.h"
+#include "sufile.h"
+#include "cpfile.h"
+#include "sysfs.h"
+
+/* /sys/fs/<nilfs>/ */
+static struct kset *nilfs_kset;
+
+#define NILFS_SHOW_TIME(time_t_val, buf) ({ \
+ struct tm res; \
+ int count = 0; \
+ time_to_tm(time_t_val, 0, &res); \
+ res.tm_year += 1900; \
+ res.tm_mon += 1; \
+ count = scnprintf(buf, PAGE_SIZE, \
+ "%ld-%.2d-%.2d %.2d:%.2d:%.2d\n", \
+ res.tm_year, res.tm_mon, res.tm_mday, \
+ res.tm_hour, res.tm_min, res.tm_sec);\
+ count; \
+})
+
+#define NILFS_DEV_INT_GROUP_OPS(name, parent_name) \
+static ssize_t nilfs_##name##_attr_show(struct kobject *kobj, \
+ struct attribute *attr, char *buf) \
+{ \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ struct nilfs_##name##_attr *a = container_of(attr, \
+ struct nilfs_##name##_attr, \
+ attr); \
+ return a->show ? a->show(a, nilfs, buf) : 0; \
+} \
+static ssize_t nilfs_##name##_attr_store(struct kobject *kobj, \
+ struct attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ struct nilfs_##name##_attr *a = container_of(attr, \
+ struct nilfs_##name##_attr, \
+ attr); \
+ return a->store ? a->store(a, nilfs, buf, len) : 0; \
+} \
+static const struct sysfs_ops nilfs_##name##_attr_ops = { \
+ .show = nilfs_##name##_attr_show, \
+ .store = nilfs_##name##_attr_store, \
+};
+
+#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
+static void nilfs_##name##_attr_release(struct kobject *kobj) \
+{ \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
+ struct the_nilfs *nilfs = container_of(kobj->parent, \
+ struct the_nilfs, \
+ ns_##parent_name##_kobj); \
+ subgroups = nilfs->ns_##parent_name##_subgroups; \
+ complete(&subgroups->sg_##name##_kobj_unregister); \
+} \
+static struct kobj_type nilfs_##name##_ktype = { \
+ .default_attrs = nilfs_##name##_attrs, \
+ .sysfs_ops = &nilfs_##name##_attr_ops, \
+ .release = nilfs_##name##_attr_release, \
+};
+
+#define NILFS_DEV_INT_GROUP_FNS(name, parent_name) \
+static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
+{ \
+ struct kobject *parent; \
+ struct kobject *kobj; \
+ struct completion *kobj_unregister; \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
+ int err; \
+ subgroups = nilfs->ns_##parent_name##_subgroups; \
+ kobj = &subgroups->sg_##name##_kobj; \
+ kobj_unregister = &subgroups->sg_##name##_kobj_unregister; \
+ parent = &nilfs->ns_##parent_name##_kobj; \
+ kobj->kset = nilfs_kset; \
+ init_completion(kobj_unregister); \
+ err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
+ #name); \
+ if (err) \
+ return err; \
+ return 0; \
+} \
+static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
+{ \
+ kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
+}
+
+/************************************************************************
+ * NILFS snapshot attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_snapshot_inodes_count_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)atomic64_read(&root->inodes_count));
+}
+
+static ssize_t
+nilfs_snapshot_blocks_count_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)atomic64_read(&root->blocks_count));
+}
+
+static const char snapshot_readme_str[] =
+ "The group contains details about mounted snapshot.\n\n"
+ "(1) inodes_count\n\tshow number of inodes for snapshot.\n\n"
+ "(2) blocks_count\n\tshow number of blocks for snapshot.\n\n";
+
+static ssize_t
+nilfs_snapshot_README_show(struct nilfs_snapshot_attr *attr,
+ struct nilfs_root *root, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, snapshot_readme_str);
+}
+
+NILFS_SNAPSHOT_RO_ATTR(inodes_count);
+NILFS_SNAPSHOT_RO_ATTR(blocks_count);
+NILFS_SNAPSHOT_RO_ATTR(README);
+
+static struct attribute *nilfs_snapshot_attrs[] = {
+ NILFS_SNAPSHOT_ATTR_LIST(inodes_count),
+ NILFS_SNAPSHOT_ATTR_LIST(blocks_count),
+ NILFS_SNAPSHOT_ATTR_LIST(README),
+ NULL,
+};
+
+static ssize_t nilfs_snapshot_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct nilfs_root *root =
+ container_of(kobj, struct nilfs_root, snapshot_kobj);
+ struct nilfs_snapshot_attr *a =
+ container_of(attr, struct nilfs_snapshot_attr, attr);
+
+ return a->show ? a->show(a, root, buf) : 0;
+}
+
+static ssize_t nilfs_snapshot_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct nilfs_root *root =
+ container_of(kobj, struct nilfs_root, snapshot_kobj);
+ struct nilfs_snapshot_attr *a =
+ container_of(attr, struct nilfs_snapshot_attr, attr);
+
+ return a->store ? a->store(a, root, buf, len) : 0;
+}
+
+static void nilfs_snapshot_attr_release(struct kobject *kobj)
+{
+ struct nilfs_root *root = container_of(kobj, struct nilfs_root,
+ snapshot_kobj);
+ complete(&root->snapshot_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_snapshot_attr_ops = {
+ .show = nilfs_snapshot_attr_show,
+ .store = nilfs_snapshot_attr_store,
+};
+
+static struct kobj_type nilfs_snapshot_ktype = {
+ .default_attrs = nilfs_snapshot_attrs,
+ .sysfs_ops = &nilfs_snapshot_attr_ops,
+ .release = nilfs_snapshot_attr_release,
+};
+
+int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
+{
+ struct the_nilfs *nilfs;
+ struct kobject *parent;
+ int err;
+
+ nilfs = root->nilfs;
+ parent = &nilfs->ns_dev_subgroups->sg_mounted_snapshots_kobj;
+ root->snapshot_kobj.kset = nilfs_kset;
+ init_completion(&root->snapshot_kobj_unregister);
+
+ if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
+ err = kobject_init_and_add(&root->snapshot_kobj,
+ &nilfs_snapshot_ktype,
+ &nilfs->ns_dev_kobj,
+ "current_checkpoint");
+ } else {
+ err = kobject_init_and_add(&root->snapshot_kobj,
+ &nilfs_snapshot_ktype,
+ parent,
+ "%llu", root->cno);
+ }
+
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
+{
+ kobject_del(&root->snapshot_kobj);
+}
+
+/************************************************************************
+ * NILFS mounted snapshots attrs *
+ ************************************************************************/
+
+static const char mounted_snapshots_readme_str[] =
+ "The mounted_snapshots group contains group for\n"
+ "every mounted snapshot.\n";
+
+static ssize_t
+nilfs_mounted_snapshots_README_show(struct nilfs_mounted_snapshots_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, mounted_snapshots_readme_str);
+}
+
+NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(README);
+
+static struct attribute *nilfs_mounted_snapshots_attrs[] = {
+ NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_TYPE(mounted_snapshots, dev);
+NILFS_DEV_INT_GROUP_FNS(mounted_snapshots, dev);
+
+/************************************************************************
+ * NILFS checkpoints attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 ncheckpoints;
+ struct nilfs_cpstat cpstat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ ncheckpoints = cpstat.cs_ncps;
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", ncheckpoints);
+}
+
+static ssize_t
+nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 nsnapshots;
+ struct nilfs_cpstat cpstat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ nsnapshots = cpstat.cs_nsss;
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", nsnapshots);
+}
+
+static ssize_t
+nilfs_checkpoints_last_seg_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 last_cno;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_cno = nilfs->ns_last_cno;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_checkpoints_next_checkpoint_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 cno;
+
+ down_read(&nilfs->ns_sem);
+ cno = nilfs->ns_cno;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
+}
+
+static const char checkpoints_readme_str[] =
+ "The checkpoints group contains attributes that describe\n"
+ "details about volume's checkpoints.\n\n"
+ "(1) checkpoints_number\n\tshow number of checkpoints on volume.\n\n"
+ "(2) snapshots_number\n\tshow number of snapshots on volume.\n\n"
+ "(3) last_seg_checkpoint\n"
+ "\tshow checkpoint number of the latest segment.\n\n"
+ "(4) next_checkpoint\n\tshow next checkpoint number.\n\n";
+
+static ssize_t
+nilfs_checkpoints_README_show(struct nilfs_checkpoints_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, checkpoints_readme_str);
+}
+
+NILFS_CHECKPOINTS_RO_ATTR(checkpoints_number);
+NILFS_CHECKPOINTS_RO_ATTR(snapshots_number);
+NILFS_CHECKPOINTS_RO_ATTR(last_seg_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(next_checkpoint);
+NILFS_CHECKPOINTS_RO_ATTR(README);
+
+static struct attribute *nilfs_checkpoints_attrs[] = {
+ NILFS_CHECKPOINTS_ATTR_LIST(checkpoints_number),
+ NILFS_CHECKPOINTS_ATTR_LIST(snapshots_number),
+ NILFS_CHECKPOINTS_ATTR_LIST(last_seg_checkpoint),
+ NILFS_CHECKPOINTS_ATTR_LIST(next_checkpoint),
+ NILFS_CHECKPOINTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(checkpoints, dev);
+NILFS_DEV_INT_GROUP_TYPE(checkpoints, dev);
+NILFS_DEV_INT_GROUP_FNS(checkpoints, dev);
+
+/************************************************************************
+ * NILFS segments attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segments_segments_number_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_nsegments);
+}
+
+static ssize_t
+nilfs_segments_blocks_per_segment_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_blocks_per_segment);
+}
+
+static ssize_t
+nilfs_segments_clean_segments_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned long ncleansegs;
+
+ down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+ ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
+ up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", ncleansegs);
+}
+
+static ssize_t
+nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_sustat sustat;
+ int err;
+
+ down_read(&nilfs->ns_segctor_sem);
+ err = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat);
+ up_read(&nilfs->ns_segctor_sem);
+ if (err < 0) {
+ printk(KERN_ERR "NILFS: unable to get segment stat: err=%d\n",
+ err);
+ return err;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", sustat.ss_ndirtysegs);
+}
+
+static const char segments_readme_str[] =
+ "The segments group contains attributes that describe\n"
+ "details about volume's segments.\n\n"
+ "(1) segments_number\n\tshow number of segments on volume.\n\n"
+ "(2) blocks_per_segment\n\tshow number of blocks in segment.\n\n"
+ "(3) clean_segments\n\tshow count of clean segments.\n\n"
+ "(4) dirty_segments\n\tshow count of dirty segments.\n\n";
+
+static ssize_t
+nilfs_segments_README_show(struct nilfs_segments_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, segments_readme_str);
+}
+
+NILFS_SEGMENTS_RO_ATTR(segments_number);
+NILFS_SEGMENTS_RO_ATTR(blocks_per_segment);
+NILFS_SEGMENTS_RO_ATTR(clean_segments);
+NILFS_SEGMENTS_RO_ATTR(dirty_segments);
+NILFS_SEGMENTS_RO_ATTR(README);
+
+static struct attribute *nilfs_segments_attrs[] = {
+ NILFS_SEGMENTS_ATTR_LIST(segments_number),
+ NILFS_SEGMENTS_ATTR_LIST(blocks_per_segment),
+ NILFS_SEGMENTS_ATTR_LIST(clean_segments),
+ NILFS_SEGMENTS_ATTR_LIST(dirty_segments),
+ NILFS_SEGMENTS_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(segments, dev);
+NILFS_DEV_INT_GROUP_TYPE(segments, dev);
+NILFS_DEV_INT_GROUP_FNS(segments, dev);
+
+/************************************************************************
+ * NILFS segctor attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_segctor_last_pseg_block_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ sector_t last_pseg;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_pseg = nilfs->ns_last_pseg;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)last_pseg);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_sequence_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u64 last_seq;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_seq = nilfs->ns_last_seq;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_seq);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_checkpoint_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 last_cno;
+
+ spin_lock(&nilfs->ns_last_segment_lock);
+ last_cno = nilfs->ns_last_cno;
+ spin_unlock(&nilfs->ns_last_segment_lock);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
+}
+
+static ssize_t
+nilfs_segctor_current_seg_sequence_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u64 seg_seq;
+
+ down_read(&nilfs->ns_sem);
+ seg_seq = nilfs->ns_seg_seq;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", seg_seq);
+}
+
+static ssize_t
+nilfs_segctor_current_last_full_seg_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 segnum;
+
+ down_read(&nilfs->ns_sem);
+ segnum = nilfs->ns_segnum;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", segnum);
+}
+
+static ssize_t
+nilfs_segctor_next_full_seg_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 nextnum;
+
+ down_read(&nilfs->ns_sem);
+ nextnum = nilfs->ns_nextnum;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", nextnum);
+}
+
+static ssize_t
+nilfs_segctor_next_pseg_offset_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned long pseg_offset;
+
+ down_read(&nilfs->ns_sem);
+ pseg_offset = nilfs->ns_pseg_offset;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%lu\n", pseg_offset);
+}
+
+static ssize_t
+nilfs_segctor_next_checkpoint_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ __u64 cno;
+
+ down_read(&nilfs->ns_sem);
+ cno = nilfs->ns_cno;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t ctime;
+
+ down_read(&nilfs->ns_sem);
+ ctime = nilfs->ns_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(ctime, buf);
+}
+
+static ssize_t
+nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t ctime;
+
+ down_read(&nilfs->ns_sem);
+ ctime = nilfs->ns_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)ctime);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t nongc_ctime;
+
+ down_read(&nilfs->ns_sem);
+ nongc_ctime = nilfs->ns_nongc_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(nongc_ctime, buf);
+}
+
+static ssize_t
+nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t nongc_ctime;
+
+ down_read(&nilfs->ns_sem);
+ nongc_ctime = nilfs->ns_nongc_ctime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)nongc_ctime);
+}
+
+static ssize_t
+nilfs_segctor_dirty_data_blocks_count_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ u32 ndirtyblks;
+
+ down_read(&nilfs->ns_sem);
+ ndirtyblks = atomic_read(&nilfs->ns_ndirtyblks);
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", ndirtyblks);
+}
+
+static const char segctor_readme_str[] =
+ "The segctor group contains attributes that describe\n"
+ "segctor thread activity details.\n\n"
+ "(1) last_pseg_block\n"
+ "\tshow start block number of the latest segment.\n\n"
+ "(2) last_seg_sequence\n"
+ "\tshow sequence value of the latest segment.\n\n"
+ "(3) last_seg_checkpoint\n"
+ "\tshow checkpoint number of the latest segment.\n\n"
+ "(4) current_seg_sequence\n\tshow segment sequence counter.\n\n"
+ "(5) current_last_full_seg\n"
+ "\tshow index number of the latest full segment.\n\n"
+ "(6) next_full_seg\n"
+ "\tshow index number of the full segment index to be used next.\n\n"
+ "(7) next_pseg_offset\n"
+ "\tshow offset of next partial segment in the current full segment.\n\n"
+ "(8) next_checkpoint\n\tshow next checkpoint number.\n\n"
+ "(9) last_seg_write_time\n"
+ "\tshow write time of the last segment in human-readable format.\n\n"
+ "(10) last_seg_write_time_secs\n"
+ "\tshow write time of the last segment in seconds.\n\n"
+ "(11) last_nongc_write_time\n"
+ "\tshow write time of the last segment not for cleaner operation "
+ "in human-readable format.\n\n"
+ "(12) last_nongc_write_time_secs\n"
+ "\tshow write time of the last segment not for cleaner operation "
+ "in seconds.\n\n"
+ "(13) dirty_data_blocks_count\n"
+ "\tshow number of dirty data blocks.\n\n";
+
+static ssize_t
+nilfs_segctor_README_show(struct nilfs_segctor_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, segctor_readme_str);
+}
+
+NILFS_SEGCTOR_RO_ATTR(last_pseg_block);
+NILFS_SEGCTOR_RO_ATTR(last_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(last_seg_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(current_seg_sequence);
+NILFS_SEGCTOR_RO_ATTR(current_last_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_full_seg);
+NILFS_SEGCTOR_RO_ATTR(next_pseg_offset);
+NILFS_SEGCTOR_RO_ATTR(next_checkpoint);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_seg_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time);
+NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time_secs);
+NILFS_SEGCTOR_RO_ATTR(dirty_data_blocks_count);
+NILFS_SEGCTOR_RO_ATTR(README);
+
+static struct attribute *nilfs_segctor_attrs[] = {
+ NILFS_SEGCTOR_ATTR_LIST(last_pseg_block),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_sequence),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_checkpoint),
+ NILFS_SEGCTOR_ATTR_LIST(current_seg_sequence),
+ NILFS_SEGCTOR_ATTR_LIST(current_last_full_seg),
+ NILFS_SEGCTOR_ATTR_LIST(next_full_seg),
+ NILFS_SEGCTOR_ATTR_LIST(next_pseg_offset),
+ NILFS_SEGCTOR_ATTR_LIST(next_checkpoint),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time),
+ NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time_secs),
+ NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time),
+ NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time_secs),
+ NILFS_SEGCTOR_ATTR_LIST(dirty_data_blocks_count),
+ NILFS_SEGCTOR_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(segctor, dev);
+NILFS_DEV_INT_GROUP_TYPE(segctor, dev);
+NILFS_DEV_INT_GROUP_FNS(segctor, dev);
+
+/************************************************************************
+ * NILFS superblock attrs *
+ ************************************************************************/
+
+static ssize_t
+nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t sbwtime;
+
+ down_read(&nilfs->ns_sem);
+ sbwtime = nilfs->ns_sbwtime;
+ up_read(&nilfs->ns_sem);
+
+ return NILFS_SHOW_TIME(sbwtime, buf);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ time_t sbwtime;
+
+ down_read(&nilfs->ns_sem);
+ sbwtime = nilfs->ns_sbwtime;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)sbwtime);
+}
+
+static ssize_t
+nilfs_superblock_sb_write_count_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned sbwcount;
+
+ down_read(&nilfs->ns_sem);
+ sbwcount = nilfs->ns_sbwcount;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbwcount);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ unsigned sb_update_freq;
+
+ down_read(&nilfs->ns_sem);
+ sb_update_freq = nilfs->ns_sb_update_freq;
+ up_read(&nilfs->ns_sem);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sb_update_freq);
+}
+
+static ssize_t
+nilfs_superblock_sb_update_frequency_store(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs,
+ const char *buf, size_t count)
+{
+ unsigned val;
+ int err;
+
+ err = kstrtouint(skip_spaces(buf), 0, &val);
+ if (err) {
+ printk(KERN_ERR "NILFS: unable to convert string: err=%d\n",
+ err);
+ return err;
+ }
+
+ if (val < NILFS_SB_FREQ) {
+ val = NILFS_SB_FREQ;
+ printk(KERN_WARNING "NILFS: superblock update frequency cannot be lesser than 10 seconds\n");
+ }
+
+ down_write(&nilfs->ns_sem);
+ nilfs->ns_sb_update_freq = val;
+ up_write(&nilfs->ns_sem);
+
+ return count;
+}
+
+static const char sb_readme_str[] =
+ "The superblock group contains attributes that describe\n"
+ "superblock's details.\n\n"
+ "(1) sb_write_time\n\tshow previous write time of super block "
+ "in human-readable format.\n\n"
+ "(2) sb_write_time_secs\n\tshow previous write time of super block "
+ "in seconds.\n\n"
+ "(3) sb_write_count\n\tshow write count of super block.\n\n"
+ "(4) sb_update_frequency\n"
+ "\tshow/set interval of periodical update of superblock (in seconds).\n\n"
+ "\tYou can set preferable frequency of superblock update by command:\n\n"
+ "\t'echo <val> > /sys/fs/<nilfs>/<dev>/superblock/sb_update_frequency'\n";
+
+static ssize_t
+nilfs_superblock_README_show(struct nilfs_superblock_attr *attr,
+ struct the_nilfs *nilfs, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, sb_readme_str);
+}
+
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_time_secs);
+NILFS_SUPERBLOCK_RO_ATTR(sb_write_count);
+NILFS_SUPERBLOCK_RW_ATTR(sb_update_frequency);
+NILFS_SUPERBLOCK_RO_ATTR(README);
+
+static struct attribute *nilfs_superblock_attrs[] = {
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time_secs),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_write_count),
+ NILFS_SUPERBLOCK_ATTR_LIST(sb_update_frequency),
+ NILFS_SUPERBLOCK_ATTR_LIST(README),
+ NULL,
+};
+
+NILFS_DEV_INT_GROUP_OPS(superblock, dev);
+NILFS_DEV_INT_GROUP_TYPE(superblock, dev);
+NILFS_DEV_INT_GROUP_FNS(superblock, dev);
+
+/************************************************************************
+ * NILFS device attrs *
+ ************************************************************************/
+
+static
+ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ u32 major = le32_to_cpu(sbp[0]->s_rev_level);
+ u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
+
+ return snprintf(buf, PAGE_SIZE, "%d.%d\n", major, minor);
+}
+
+static
+ssize_t nilfs_dev_blocksize_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", nilfs->ns_blocksize);
+}
+
+static
+ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", dev_size);
+}
+
+static
+ssize_t nilfs_dev_free_blocks_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ sector_t free_blocks = 0;
+
+ nilfs_count_free_blocks(nilfs, &free_blocks);
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)free_blocks);
+}
+
+static
+ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+
+ return snprintf(buf, PAGE_SIZE, "%pUb\n", sbp[0]->s_uuid);
+}
+
+static
+ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ struct nilfs_super_block **sbp = nilfs->ns_sbp;
+
+ return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
+ sbp[0]->s_volume_name);
+}
+
+static const char dev_readme_str[] =
+ "The <device> group contains attributes that describe file system\n"
+ "partition's details.\n\n"
+ "(1) revision\n\tshow NILFS file system revision.\n\n"
+ "(2) blocksize\n\tshow volume block size in bytes.\n\n"
+ "(3) device_size\n\tshow volume size in bytes.\n\n"
+ "(4) free_blocks\n\tshow count of free blocks on volume.\n\n"
+ "(5) uuid\n\tshow volume's UUID.\n\n"
+ "(6) volume_name\n\tshow volume's name.\n\n";
+
+static ssize_t nilfs_dev_README_show(struct nilfs_dev_attr *attr,
+ struct the_nilfs *nilfs,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, dev_readme_str);
+}
+
+NILFS_DEV_RO_ATTR(revision);
+NILFS_DEV_RO_ATTR(blocksize);
+NILFS_DEV_RO_ATTR(device_size);
+NILFS_DEV_RO_ATTR(free_blocks);
+NILFS_DEV_RO_ATTR(uuid);
+NILFS_DEV_RO_ATTR(volume_name);
+NILFS_DEV_RO_ATTR(README);
+
+static struct attribute *nilfs_dev_attrs[] = {
+ NILFS_DEV_ATTR_LIST(revision),
+ NILFS_DEV_ATTR_LIST(blocksize),
+ NILFS_DEV_ATTR_LIST(device_size),
+ NILFS_DEV_ATTR_LIST(free_blocks),
+ NILFS_DEV_ATTR_LIST(uuid),
+ NILFS_DEV_ATTR_LIST(volume_name),
+ NILFS_DEV_ATTR_LIST(README),
+ NULL,
+};
+
+static ssize_t nilfs_dev_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+ attr);
+
+ return a->show ? a->show(a, nilfs, buf) : 0;
+}
+
+static ssize_t nilfs_dev_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
+ attr);
+
+ return a->store ? a->store(a, nilfs, buf, len) : 0;
+}
+
+static void nilfs_dev_attr_release(struct kobject *kobj)
+{
+ struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
+ ns_dev_kobj);
+ complete(&nilfs->ns_dev_kobj_unregister);
+}
+
+static const struct sysfs_ops nilfs_dev_attr_ops = {
+ .show = nilfs_dev_attr_show,
+ .store = nilfs_dev_attr_store,
+};
+
+static struct kobj_type nilfs_dev_ktype = {
+ .default_attrs = nilfs_dev_attrs,
+ .sysfs_ops = &nilfs_dev_attr_ops,
+ .release = nilfs_dev_attr_release,
+};
+
+int nilfs_sysfs_create_device_group(struct super_block *sb)
+{
+ struct the_nilfs *nilfs = sb->s_fs_info;
+ size_t devgrp_size = sizeof(struct nilfs_sysfs_dev_subgroups);
+ int err;
+
+ nilfs->ns_dev_subgroups = kzalloc(devgrp_size, GFP_KERNEL);
+ if (unlikely(!nilfs->ns_dev_subgroups)) {
+ err = -ENOMEM;
+ printk(KERN_ERR "NILFS: unable to allocate memory for device group\n");
+ goto failed_create_device_group;
+ }
+
+ nilfs->ns_dev_kobj.kset = nilfs_kset;
+ init_completion(&nilfs->ns_dev_kobj_unregister);
+ err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
+ "%s", sb->s_id);
+ if (err)
+ goto free_dev_subgroups;
+
+ err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
+ if (err)
+ goto cleanup_dev_kobject;
+
+ err = nilfs_sysfs_create_checkpoints_group(nilfs);
+ if (err)
+ goto delete_mounted_snapshots_group;
+
+ err = nilfs_sysfs_create_segments_group(nilfs);
+ if (err)
+ goto delete_checkpoints_group;
+
+ err = nilfs_sysfs_create_superblock_group(nilfs);
+ if (err)
+ goto delete_segments_group;
+
+ err = nilfs_sysfs_create_segctor_group(nilfs);
+ if (err)
+ goto delete_superblock_group;
+
+ return 0;
+
+delete_superblock_group:
+ nilfs_sysfs_delete_superblock_group(nilfs);
+
+delete_segments_group:
+ nilfs_sysfs_delete_segments_group(nilfs);
+
+delete_checkpoints_group:
+ nilfs_sysfs_delete_checkpoints_group(nilfs);
+
+delete_mounted_snapshots_group:
+ nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+
+cleanup_dev_kobject:
+ kobject_del(&nilfs->ns_dev_kobj);
+
+free_dev_subgroups:
+ kfree(nilfs->ns_dev_subgroups);
+
+failed_create_device_group:
+ return err;
+}
+
+void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
+{
+ nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
+ nilfs_sysfs_delete_checkpoints_group(nilfs);
+ nilfs_sysfs_delete_segments_group(nilfs);
+ nilfs_sysfs_delete_superblock_group(nilfs);
+ nilfs_sysfs_delete_segctor_group(nilfs);
+ kobject_del(&nilfs->ns_dev_kobj);
+ kfree(nilfs->ns_dev_subgroups);
+}
+
+/************************************************************************
+ * NILFS feature attrs *
+ ************************************************************************/
+
+static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d.%d\n",
+ NILFS_CURRENT_REV, NILFS_MINOR_REV);
+}
+
+static const char features_readme_str[] =
+ "The features group contains attributes that describe NILFS file\n"
+ "system driver features.\n\n"
+ "(1) revision\n\tshow current revision of NILFS file system driver.\n";
+
+static ssize_t nilfs_feature_README_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, features_readme_str);
+}
+
+NILFS_FEATURE_RO_ATTR(revision);
+NILFS_FEATURE_RO_ATTR(README);
+
+static struct attribute *nilfs_feature_attrs[] = {
+ NILFS_FEATURE_ATTR_LIST(revision),
+ NILFS_FEATURE_ATTR_LIST(README),
+ NULL,
+};
+
+static const struct attribute_group nilfs_feature_attr_group = {
+ .name = "features",
+ .attrs = nilfs_feature_attrs,
+};
+
+int __init nilfs_sysfs_init(void)
+{
+ int err;
+
+ nilfs_kset = kset_create_and_add(NILFS_ROOT_GROUP_NAME, NULL, fs_kobj);
+ if (!nilfs_kset) {
+ err = -ENOMEM;
+ printk(KERN_ERR "NILFS: unable to create sysfs entry: err %d\n",
+ err);
+ goto failed_sysfs_init;
+ }
+
+ err = sysfs_create_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+ if (unlikely(err)) {
+ printk(KERN_ERR "NILFS: unable to create feature group: err %d\n",
+ err);
+ goto cleanup_sysfs_init;
+ }
+
+ return 0;
+
+cleanup_sysfs_init:
+ kset_unregister(nilfs_kset);
+
+failed_sysfs_init:
+ return err;
+}
+
+void nilfs_sysfs_exit(void)
+{
+ sysfs_remove_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
+ kset_unregister(nilfs_kset);
+}
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
new file mode 100644
index 000000000000..677e3a1a8370
--- /dev/null
+++ b/fs/nilfs2/sysfs.h
@@ -0,0 +1,176 @@
+/*
+ * sysfs.h - sysfs support declarations.
+ *
+ * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
+ * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
+ */
+
+#ifndef _NILFS_SYSFS_H
+#define _NILFS_SYSFS_H
+
+#include <linux/sysfs.h>
+
+#define NILFS_ROOT_GROUP_NAME "nilfs2"
+
+/*
+ * struct nilfs_sysfs_dev_subgroups - device subgroup kernel objects
+ * @sg_superblock_kobj: /sys/fs/<nilfs>/<device>/superblock
+ * @sg_superblock_kobj_unregister: completion state
+ * @sg_segctor_kobj: /sys/fs/<nilfs>/<device>/segctor
+ * @sg_segctor_kobj_unregister: completion state
+ * @sg_mounted_snapshots_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots
+ * @sg_mounted_snapshots_kobj_unregister: completion state
+ * @sg_checkpoints_kobj: /sys/fs/<nilfs>/<device>/checkpoints
+ * @sg_checkpoints_kobj_unregister: completion state
+ * @sg_segments_kobj: /sys/fs/<nilfs>/<device>/segments
+ * @sg_segments_kobj_unregister: completion state
+ */
+struct nilfs_sysfs_dev_subgroups {
+ /* /sys/fs/<nilfs>/<device>/superblock */
+ struct kobject sg_superblock_kobj;
+ struct completion sg_superblock_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/segctor */
+ struct kobject sg_segctor_kobj;
+ struct completion sg_segctor_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/mounted_snapshots */
+ struct kobject sg_mounted_snapshots_kobj;
+ struct completion sg_mounted_snapshots_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/checkpoints */
+ struct kobject sg_checkpoints_kobj;
+ struct completion sg_checkpoints_kobj_unregister;
+
+ /* /sys/fs/<nilfs>/<device>/segments */
+ struct kobject sg_segments_kobj;
+ struct completion sg_segments_kobj_unregister;
+};
+
+#define NILFS_COMMON_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct kobject *, struct attribute *, \
+ char *); \
+ ssize_t (*store)(struct kobject *, struct attribute *, \
+ const char *, size_t); \
+};
+
+NILFS_COMMON_ATTR_STRUCT(feature);
+
+#define NILFS_DEV_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+ char *); \
+ ssize_t (*store)(struct nilfs_##name##_attr *, struct the_nilfs *, \
+ const char *, size_t); \
+};
+
+NILFS_DEV_ATTR_STRUCT(dev);
+NILFS_DEV_ATTR_STRUCT(segments);
+NILFS_DEV_ATTR_STRUCT(mounted_snapshots);
+NILFS_DEV_ATTR_STRUCT(checkpoints);
+NILFS_DEV_ATTR_STRUCT(superblock);
+NILFS_DEV_ATTR_STRUCT(segctor);
+
+#define NILFS_CP_ATTR_STRUCT(name) \
+struct nilfs_##name##_attr { \
+ struct attribute attr; \
+ ssize_t (*show)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+ char *); \
+ ssize_t (*store)(struct nilfs_##name##_attr *, struct nilfs_root *, \
+ const char *, size_t); \
+};
+
+NILFS_CP_ATTR_STRUCT(snapshot);
+
+#define NILFS_ATTR(type, name, mode, show, store) \
+ static struct nilfs_##type##_attr nilfs_##type##_attr_##name = \
+ __ATTR(name, mode, show, store)
+
+#define NILFS_INFO_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0444, NULL, NULL)
+#define NILFS_RO_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0444, nilfs_##type##_##name##_show, NULL)
+#define NILFS_RW_ATTR(type, name) \
+ NILFS_ATTR(type, name, 0644, \
+ nilfs_##type##_##name##_show, \
+ nilfs_##type##_##name##_store)
+
+#define NILFS_FEATURE_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(feature, name)
+#define NILFS_FEATURE_RO_ATTR(name) \
+ NILFS_RO_ATTR(feature, name)
+#define NILFS_FEATURE_RW_ATTR(name) \
+ NILFS_RW_ATTR(feature, name)
+
+#define NILFS_DEV_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(dev, name)
+#define NILFS_DEV_RO_ATTR(name) \
+ NILFS_RO_ATTR(dev, name)
+#define NILFS_DEV_RW_ATTR(name) \
+ NILFS_RW_ATTR(dev, name)
+
+#define NILFS_SEGMENTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(segments, name)
+#define NILFS_SEGMENTS_RW_ATTR(name) \
+ NILFS_RW_ATTR(segs_info, name)
+
+#define NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(mounted_snapshots, name)
+
+#define NILFS_CHECKPOINTS_RO_ATTR(name) \
+ NILFS_RO_ATTR(checkpoints, name)
+#define NILFS_CHECKPOINTS_RW_ATTR(name) \
+ NILFS_RW_ATTR(checkpoints, name)
+
+#define NILFS_SNAPSHOT_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RO_ATTR(name) \
+ NILFS_RO_ATTR(snapshot, name)
+#define NILFS_SNAPSHOT_RW_ATTR(name) \
+ NILFS_RW_ATTR(snapshot, name)
+
+#define NILFS_SUPERBLOCK_RO_ATTR(name) \
+ NILFS_RO_ATTR(superblock, name)
+#define NILFS_SUPERBLOCK_RW_ATTR(name) \
+ NILFS_RW_ATTR(superblock, name)
+
+#define NILFS_SEGCTOR_INFO_ATTR(name) \
+ NILFS_INFO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RO_ATTR(name) \
+ NILFS_RO_ATTR(segctor, name)
+#define NILFS_SEGCTOR_RW_ATTR(name) \
+ NILFS_RW_ATTR(segctor, name)
+
+#define NILFS_FEATURE_ATTR_LIST(name) \
+ (&nilfs_feature_attr_##name.attr)
+#define NILFS_DEV_ATTR_LIST(name) \
+ (&nilfs_dev_attr_##name.attr)
+#define NILFS_SEGMENTS_ATTR_LIST(name) \
+ (&nilfs_segments_attr_##name.attr)
+#define NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(name) \
+ (&nilfs_mounted_snapshots_attr_##name.attr)
+#define NILFS_CHECKPOINTS_ATTR_LIST(name) \
+ (&nilfs_checkpoints_attr_##name.attr)
+#define NILFS_SNAPSHOT_ATTR_LIST(name) \
+ (&nilfs_snapshot_attr_##name.attr)
+#define NILFS_SUPERBLOCK_ATTR_LIST(name) \
+ (&nilfs_superblock_attr_##name.attr)
+#define NILFS_SEGCTOR_ATTR_LIST(name) \
+ (&nilfs_segctor_attr_##name.attr)
+
+#endif /* _NILFS_SYSFS_H */
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8ba8229ba076..9da25fe9ea61 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -85,6 +85,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
nilfs->ns_cptree = RB_ROOT;
spin_lock_init(&nilfs->ns_cptree_lock);
init_rwsem(&nilfs->ns_segctor_sem);
+ nilfs->ns_sb_update_freq = NILFS_SB_FREQ;
return nilfs;
}
@@ -97,6 +98,7 @@ void destroy_nilfs(struct the_nilfs *nilfs)
{
might_sleep();
if (nilfs_init(nilfs)) {
+ nilfs_sysfs_delete_device_group(nilfs);
brelse(nilfs->ns_sbh[0]);
brelse(nilfs->ns_sbh[1]);
}
@@ -640,6 +642,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto failed_sbh;
+ err = nilfs_sysfs_create_device_group(sb);
+ if (err)
+ goto failed_sbh;
+
set_nilfs_init(nilfs);
err = 0;
out:
@@ -740,12 +746,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
{
struct rb_node **p, *parent;
struct nilfs_root *root, *new;
+ int err;
root = nilfs_lookup_root(nilfs, cno);
if (root)
return root;
- new = kmalloc(sizeof(*root), GFP_KERNEL);
+ new = kzalloc(sizeof(*root), GFP_KERNEL);
if (!new)
return NULL;
@@ -782,6 +789,12 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
spin_unlock(&nilfs->ns_cptree_lock);
+ err = nilfs_sysfs_create_snapshot_group(new);
+ if (err) {
+ kfree(new);
+ new = NULL;
+ }
+
return new;
}
@@ -790,6 +803,8 @@ void nilfs_put_root(struct nilfs_root *root)
if (atomic_dec_and_test(&root->count)) {
struct the_nilfs *nilfs = root->nilfs;
+ nilfs_sysfs_delete_snapshot_group(root);
+
spin_lock(&nilfs->ns_cptree_lock);
rb_erase(&root->rb_node, &nilfs->ns_cptree);
spin_unlock(&nilfs->ns_cptree_lock);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index de8cc53b4a5c..d01ead1bea9a 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -33,6 +33,7 @@
#include <linux/slab.h>
struct nilfs_sc_info;
+struct nilfs_sysfs_dev_subgroups;
/* the_nilfs struct */
enum {
@@ -54,6 +55,7 @@ enum {
* @ns_sbwcount: write count of super block
* @ns_sbsize: size of valid data in super block
* @ns_mount_state: file system state
+ * @ns_sb_update_freq: interval of periodical update of superblocks (in seconds)
* @ns_seg_seq: segment sequence counter
* @ns_segnum: index number of the latest full segment.
* @ns_nextnum: index number of the full segment index to be used next
@@ -95,6 +97,9 @@ enum {
* @ns_inode_size: size of on-disk inode
* @ns_first_ino: first not-special inode number
* @ns_crc_seed: seed value of CRC32 calculation
+ * @ns_dev_kobj: /sys/fs/<nilfs>/<device>
+ * @ns_dev_kobj_unregister: completion state
+ * @ns_dev_subgroups: <device> subgroups pointer
*/
struct the_nilfs {
unsigned long ns_flags;
@@ -114,6 +119,7 @@ struct the_nilfs {
unsigned ns_sbwcount;
unsigned ns_sbsize;
unsigned ns_mount_state;
+ unsigned ns_sb_update_freq;
/*
* Following fields are dedicated to a writable FS-instance.
@@ -188,6 +194,11 @@ struct the_nilfs {
int ns_inode_size;
int ns_first_ino;
u32 ns_crc_seed;
+
+ /* /sys/fs/<nilfs>/<device> */
+ struct kobject ns_dev_kobj;
+ struct completion ns_dev_kobj_unregister;
+ struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
};
#define THE_NILFS_FNS(bit, name) \
@@ -232,6 +243,8 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
* @ifile: inode file
* @inodes_count: number of inodes
* @blocks_count: number of blocks
+ * @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot>
+ * @snapshot_kobj_unregister: completion state for kernel object
*/
struct nilfs_root {
__u64 cno;
@@ -243,6 +256,10 @@ struct nilfs_root {
atomic64_t inodes_count;
atomic64_t blocks_count;
+
+ /* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */
+ struct kobject snapshot_kobj;
+ struct completion snapshot_kobj_unregister;
};
/* Special checkpoint number */
@@ -254,7 +271,8 @@ struct nilfs_root {
static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
{
u64 t = get_seconds();
- return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ;
+ return t < nilfs->ns_sbwtime ||
+ t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
}
static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 74825be65b7b..9ce062218de9 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
- hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
+ hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
out:
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 68ca5a8704b5..ac851e8376b1 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
- hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
+ hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
out:
fsnotify_recalc_vfsmount_mask_locked(mnt);
spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5c9e2c81cb11..f5ec1ce7a532 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -74,8 +74,6 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
* ntfs_attr_extend_initialized - extend the initialized size of an attribute
* @ni: ntfs inode of the attribute to extend
* @new_init_size: requested new initialized size in bytes
- * @cached_page: store any allocated but unused page here
- * @lru_pvec: lru-buffering pagevec of the caller
*
* Extend the initialized size of an attribute described by the ntfs inode @ni
* to @new_init_size bytes. This involves zeroing any non-sparse space between
@@ -395,7 +393,6 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
* @nr_pages: number of page cache pages to obtain
* @pages: array of pages in which to return the obtained page cache pages
* @cached_page: allocated but as yet unused page
- * @lru_pvec: lru-buffering pagevec of caller
*
* Obtain @nr_pages locked page cache pages from the mapping @mapping and
* starting at index @index.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9d8fcf2f3b94..a93bf9892256 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4961,6 +4961,15 @@ leftright:
el = path_leaf_el(path);
split_index = ocfs2_search_extent_list(el, cpos);
+ if (split_index == -1) {
+ ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu has an extent at cpos %u "
+ "which can no longer be found.\n",
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ cpos);
+ ret = -EROFS;
+ goto out;
+ }
goto leftright;
}
out:
@@ -5135,7 +5144,7 @@ int ocfs2_change_extent_flag(handle_t *handle,
el = path_leaf_el(left_path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(sb,
"Owner %llu has an extent at cpos %u which can no "
"longer be found.\n",
@@ -5491,7 +5500,7 @@ int ocfs2_remove_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
"Owner %llu has an extent at cpos %u which can no "
"longer be found.\n",
@@ -5557,7 +5566,7 @@ int ocfs2_remove_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
"Owner %llu: split at cpos %u lost record.",
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 4a231a166cf8..a06b23773384 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1817,16 +1817,6 @@ try_again:
if (ret)
goto out_commit;
}
- /*
- * We don't want this to fail in ocfs2_write_end(), so do it
- * here.
- */
- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out_quota;
- }
/*
* Fill our page array first. That way we've grabbed enough so
@@ -1977,7 +1967,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- int i;
+ int i, ret;
unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
struct inode *inode = mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -2027,6 +2017,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
}
+ ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ copied = ret;
+ mlog_errno(ret);
+ goto out;
+ }
+
out_write_size:
pos += copied;
if (pos > i_size_read(inode)) {
@@ -2041,6 +2039,7 @@ out_write_size:
ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, wc->w_di_bh);
+out:
ocfs2_commit_trans(osb, handle);
ocfs2_run_deallocs(osb, &wc->w_dealloc);
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 1ec141e758d7..62e8ec619b4c 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -160,9 +160,18 @@ static void o2quo_make_decision(struct work_struct *work)
}
out:
- spin_unlock(&qs->qs_lock);
- if (fence)
+ if (fence) {
+ spin_unlock(&qs->qs_lock);
o2quo_fence_self();
+ } else {
+ mlog(ML_NOTICE, "not fencing this node, heartbeating: %d, "
+ "connected: %d, lowest: %d (%sreachable)\n",
+ qs->qs_heartbeating, qs->qs_connected, lowest_hb,
+ lowest_reachable ? "" : "un");
+ spin_unlock(&qs->qs_lock);
+
+ }
+
}
static void o2quo_set_hold(struct o2quo_state *qs, u8 node)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 681691bc233a..ea34952f9496 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1480,6 +1480,14 @@ static int o2net_set_nodelay(struct socket *sock)
return ret;
}
+static int o2net_set_usertimeout(struct socket *sock)
+{
+ int user_timeout = O2NET_TCP_USER_TIMEOUT;
+
+ return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
+ (char *)&user_timeout, sizeof(user_timeout));
+}
+
static void o2net_initialize_handshake(void)
{
o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
@@ -1536,16 +1544,20 @@ static void o2net_idle_timer(unsigned long data)
#endif
printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been "
- "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc),
- msecs / 1000, msecs % 1000);
+ "idle for %lu.%lu secs.\n",
+ SC_NODEF_ARGS(sc), msecs / 1000, msecs % 1000);
- /*
- * Initialize the nn_timeout so that the next connection attempt
- * will continue in o2net_start_connect.
+ /* idle timerout happen, don't shutdown the connection, but
+ * make fence decision. Maybe the connection can recover before
+ * the decision is made.
*/
atomic_set(&nn->nn_timeout, 1);
+ o2quo_conn_err(o2net_num_from_nn(nn));
+ queue_delayed_work(o2net_wq, &nn->nn_still_up,
+ msecs_to_jiffies(O2NET_QUORUM_DELAY_MS));
+
+ o2net_sc_reset_idle_timer(sc);
- o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
}
static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
@@ -1560,6 +1572,15 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
{
+ struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
+
+ /* clear fence decision since the connection recover from timeout*/
+ if (atomic_read(&nn->nn_timeout)) {
+ o2quo_conn_up(o2net_num_from_nn(nn));
+ cancel_delayed_work(&nn->nn_still_up);
+ atomic_set(&nn->nn_timeout, 0);
+ }
+
/* Only push out an existing timer */
if (timer_pending(&sc->sc_idle_timeout))
o2net_sc_reset_idle_timer(sc);
@@ -1650,6 +1671,12 @@ static void o2net_start_connect(struct work_struct *work)
goto out;
}
+ ret = o2net_set_usertimeout(sock);
+ if (ret) {
+ mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
+ goto out;
+ }
+
o2net_register_callbacks(sc->sc_sock->sk, sc);
spin_lock(&nn->nn_lock);
@@ -1831,6 +1858,12 @@ static int o2net_accept_one(struct socket *sock, int *more)
goto out;
}
+ ret = o2net_set_usertimeout(new_sock);
+ if (ret) {
+ mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
+ goto out;
+ }
+
slen = sizeof(sin);
ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
&slen, 1);
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 5bada2a69b50..c571e849fda4 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -63,6 +63,7 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data,
#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000
#define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000
+#define O2NET_TCP_USER_TIMEOUT 0x7fffffff
/* TODO: figure this out.... */
static inline int o2net_link_down(int err, struct socket *sock)
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 39efc5057a36..3fcf205ee900 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1923,12 +1923,11 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
goto bail;
}
- if (total_backoff >
- msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
+ if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) {
status = -ERESTARTSYS;
mlog(ML_NOTICE, "Timed out joining dlm domain "
"%s after %u msecs\n", dlm->name,
- jiffies_to_msecs(total_backoff));
+ total_backoff);
goto bail;
}
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 82abf0cc9a12..eb42b2988765 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -694,14 +694,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
res->inflight_assert_workers);
}
-static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
- struct dlm_lock_resource *res)
-{
- spin_lock(&res->spinlock);
- __dlm_lockres_grab_inflight_worker(dlm, res);
- spin_unlock(&res->spinlock);
-}
-
static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
@@ -1635,6 +1627,7 @@ send_response:
}
mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
dlm->node_num, res->lockname.len, res->lockname.name);
+ spin_lock(&res->spinlock);
ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
DLM_ASSERT_MASTER_MLE_CLEANUP);
if (ret < 0) {
@@ -1642,7 +1635,8 @@ send_response:
response = DLM_MASTER_RESP_ERROR;
dlm_lockres_put(res);
} else
- dlm_lockres_grab_inflight_worker(dlm, res);
+ __dlm_lockres_grab_inflight_worker(dlm, res);
+ spin_unlock(&res->spinlock);
} else {
if (res)
dlm_lockres_put(res);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 437de7f768c6..21708cda4b8a 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1192,17 +1192,9 @@ void ocfs2_evict_inode(struct inode *inode)
int ocfs2_drop_inode(struct inode *inode)
{
struct ocfs2_inode_info *oi = OCFS2_I(inode);
- int res;
-
trace_ocfs2_drop_inode((unsigned long long)oi->ip_blkno,
inode->i_nlink, oi->ip_flags);
-
- if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
- res = 1;
- else
- res = generic_drop_inode(inode);
-
- return res;
+ return 1;
}
/*
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 6f66b3751ace..53e6c40ed4c6 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -35,9 +35,8 @@
copy_to_user((typeof(a) __user *)b, &(a), sizeof(a))
/*
- * This call is void because we are already reporting an error that may
- * be -EFAULT. The error will be returned from the ioctl(2) call. It's
- * just a best-effort to tell userspace that this request caused the error.
+ * This is just a best-effort to tell userspace that this request
+ * caused the error.
*/
static inline void o2info_set_request_error(struct ocfs2_info_request *kreq,
struct ocfs2_info_request __user *req)
@@ -146,136 +145,105 @@ bail:
static int ocfs2_info_handle_blocksize(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_blocksize oib;
if (o2info_from_user(oib, req))
- goto bail;
+ return -EFAULT;
oib.ib_blocksize = inode->i_sb->s_blocksize;
o2info_set_request_filled(&oib.ib_req);
if (o2info_to_user(oib, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oib.ib_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_clustersize(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_clustersize oic;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oic, req))
- goto bail;
+ return -EFAULT;
oic.ic_clustersize = osb->s_clustersize;
o2info_set_request_filled(&oic.ic_req);
if (o2info_to_user(oic, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oic.ic_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_maxslots(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_maxslots oim;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oim, req))
- goto bail;
+ return -EFAULT;
oim.im_max_slots = osb->max_slots;
o2info_set_request_filled(&oim.im_req);
if (o2info_to_user(oim, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oim.im_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_label(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_label oil;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oil, req))
- goto bail;
+ return -EFAULT;
memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
o2info_set_request_filled(&oil.il_req);
if (o2info_to_user(oil, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oil.il_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_uuid(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_uuid oiu;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oiu, req))
- goto bail;
+ return -EFAULT;
memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
o2info_set_request_filled(&oiu.iu_req);
if (o2info_to_user(oiu, req))
- goto bail;
-
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oiu.iu_req, req);
+ return -EFAULT;
- return status;
+ return 0;
}
static int ocfs2_info_handle_fs_features(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_fs_features oif;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oif, req))
- goto bail;
+ return -EFAULT;
oif.if_compat_features = osb->s_feature_compat;
oif.if_incompat_features = osb->s_feature_incompat;
@@ -284,39 +252,28 @@ static int ocfs2_info_handle_fs_features(struct inode *inode,
o2info_set_request_filled(&oif.if_req);
if (o2info_to_user(oif, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oif.if_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_handle_journal_size(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_journal_size oij;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
if (o2info_from_user(oij, req))
- goto bail;
+ return -EFAULT;
oij.ij_journal_size = i_size_read(osb->journal->j_inode);
o2info_set_request_filled(&oij.ij_req);
if (o2info_to_user(oij, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oij.ij_req, req);
-
- return status;
+ return 0;
}
static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
@@ -373,7 +330,7 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
u32 i;
u64 blkno = -1;
char namebuf[40];
- int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE;
+ int status, type = INODE_ALLOC_SYSTEM_INODE;
struct ocfs2_info_freeinode *oifi = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct inode *inode_alloc = NULL;
@@ -385,8 +342,10 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
goto out_err;
}
- if (o2info_from_user(*oifi, req))
- goto bail;
+ if (o2info_from_user(*oifi, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
oifi->ifi_slotnum = osb->max_slots;
@@ -424,14 +383,16 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
o2info_set_request_filled(&oifi->ifi_req);
- if (o2info_to_user(*oifi, req))
- goto bail;
+ if (o2info_to_user(*oifi, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
status = 0;
bail:
if (status)
o2info_set_request_error(&oifi->ifi_req, req);
-
+out_free:
kfree(oifi);
out_err:
return status;
@@ -658,7 +619,7 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
{
u64 blkno = -1;
char namebuf[40];
- int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE;
+ int status, type = GLOBAL_BITMAP_SYSTEM_INODE;
struct ocfs2_info_freefrag *oiff;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -671,8 +632,10 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
goto out_err;
}
- if (o2info_from_user(*oiff, req))
- goto bail;
+ if (o2info_from_user(*oiff, req)) {
+ status = -EFAULT;
+ goto out_free;
+ }
/*
* chunksize from userspace should be power of 2.
*/
@@ -711,14 +674,14 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
if (o2info_to_user(*oiff, req)) {
status = -EFAULT;
- goto bail;
+ goto out_free;
}
status = 0;
bail:
if (status)
o2info_set_request_error(&oiff->iff_req, req);
-
+out_free:
kfree(oiff);
out_err:
return status;
@@ -727,23 +690,17 @@ out_err:
static int ocfs2_info_handle_unknown(struct inode *inode,
struct ocfs2_info_request __user *req)
{
- int status = -EFAULT;
struct ocfs2_info_request oir;
if (o2info_from_user(oir, req))
- goto bail;
+ return -EFAULT;
o2info_clear_request_filled(&oir);
if (o2info_to_user(oir, req))
- goto bail;
+ return -EFAULT;
- status = 0;
-bail:
- if (status)
- o2info_set_request_error(&oir, req);
-
- return status;
+ return 0;
}
/*
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 599eb4c4c8be..6219aaadeb08 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -98,7 +98,7 @@ static int __ocfs2_move_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(inode->i_sb,
"Inode %llu has an extent at cpos %u which can no "
"longer be found.\n",
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 636aab69ead5..d81f6e2a97f5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3109,7 +3109,7 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
+ if (index == -1) {
ocfs2_error(sb,
"Inode %llu has an extent at cpos %u which can no "
"longer be found.\n",
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 1424c151cccc..a88b2a4fcc85 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -382,7 +382,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
- si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
+ si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
GFP_KERNEL);
if (!si->si_bh) {
status = -ENOMEM;
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index ec58c7659183..ba8819702c56 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -321,7 +321,7 @@ static int omfs_get_imap(struct super_block *sb)
goto out;
sbi->s_imap_size = array_size;
- sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL);
+ sbi->s_imap = kcalloc(array_size, sizeof(unsigned long *), GFP_KERNEL);
if (!sbi->s_imap)
goto nomem;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e442784ef74a..f50d4be6b27f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2445,7 +2445,7 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
#ifdef CONFIG_USER_NS
static int proc_id_map_open(struct inode *inode, struct file *file,
- struct seq_operations *seq_ops)
+ const struct seq_operations *seq_ops)
{
struct user_namespace *ns = NULL;
struct task_struct *task;
@@ -2774,12 +2774,12 @@ out:
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- int result = 0;
+ int result = -ENOENT;
struct task_struct *task;
unsigned tgid;
struct pid_namespace *ns;
- tgid = name_to_int(dentry);
+ tgid = name_to_int(&dentry->d_name);
if (tgid == ~0U)
goto out;
@@ -3027,7 +3027,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
if (!leader)
goto out_no_task;
- tid = name_to_int(dentry);
+ tid = name_to_int(&dentry->d_name);
if (tid == ~0U)
goto out;
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index eb82e9f00f58..e11d7c590bb0 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -204,7 +204,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
{
struct task_struct *task = get_proc_task(dir);
int result = -ENOENT;
- unsigned fd = name_to_int(dentry);
+ unsigned fd = name_to_int(&dentry->d_name);
if (!task)
goto out_no_task;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b7f268eb5f45..317b72641ebf 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -27,7 +27,7 @@
#include "internal.h"
-DEFINE_SPINLOCK(proc_subdir_lock);
+static DEFINE_SPINLOCK(proc_subdir_lock);
static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
{
@@ -330,28 +330,28 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
nlink_t nlink)
{
struct proc_dir_entry *ent = NULL;
- const char *fn = name;
- unsigned int len;
-
- /* make sure name is valid */
- if (!name || !strlen(name))
- goto out;
+ const char *fn;
+ struct qstr qstr;
if (xlate_proc_name(name, parent, &fn) != 0)
goto out;
+ qstr.name = fn;
+ qstr.len = strlen(fn);
+ if (qstr.len == 0 || qstr.len >= 256) {
+ WARN(1, "name len %u\n", qstr.len);
+ return NULL;
+ }
+ if (*parent == &proc_root && name_to_int(&qstr) != ~0U) {
+ WARN(1, "create '/proc/%s' by hand\n", qstr.name);
+ return NULL;
+ }
- /* At this point there must not be any '/' characters beyond *fn */
- if (strchr(fn, '/'))
- goto out;
-
- len = strlen(fn);
-
- ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
+ ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
if (!ent)
goto out;
- memcpy(ent->name, fn, len + 1);
- ent->namelen = len;
+ memcpy(ent->name, fn, qstr.len + 1);
+ ent->namelen = qstr.len;
ent->mode = mode;
ent->nlink = nlink;
atomic_set(&ent->count, 1);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3ab6d14e71c5..a17accbd05ee 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -112,10 +112,10 @@ static inline int task_dumpable(struct task_struct *task)
return 0;
}
-static inline unsigned name_to_int(struct dentry *dentry)
+static inline unsigned name_to_int(const struct qstr *qstr)
{
- const char *name = dentry->d_name.name;
- int len = dentry->d_name.len;
+ const char *name = qstr->name;
+ int len = qstr->len;
unsigned n = 0;
if (len > 1 && *name == '0')
@@ -178,8 +178,6 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i
/*
* generic.c
*/
-extern spinlock_t proc_subdir_lock;
-
extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
struct dentry *);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 39e6ef32f0bd..6df8d0722c97 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -172,7 +172,7 @@ get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
- end = ALIGN(end, PAGE_SIZE);
+ end = PAGE_ALIGN(end);
/* overlap check (because we have to align page */
list_for_each_entry(tmp, head, list) {
if (tmp->type != KCORE_VMEMMAP)
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 7445af0b1aa3..aa1eee06420f 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -168,7 +168,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
K(global_page_state(NR_WRITEBACK)),
K(global_page_state(NR_ANON_PAGES)),
K(global_page_state(NR_FILE_MAPPED)),
- K(global_page_state(NR_SHMEM)),
+ K(i.sharedram),
K(global_page_state(NR_SLAB_RECLAIMABLE) +
global_page_state(NR_SLAB_UNRECLAIMABLE)),
K(global_page_state(NR_SLAB_RECLAIMABLE)),
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 71290463a1d3..f92d5dd578a4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -632,7 +632,7 @@ out:
return ret;
}
-static int scan(struct ctl_table_header *head, ctl_table *table,
+static int scan(struct ctl_table_header *head, struct ctl_table *table,
unsigned long *pos, struct file *file,
struct dir_context *ctx)
{
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index cb761f010300..15f327bed8c6 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -18,7 +18,7 @@
/*
* The /proc/tty directory inodes...
*/
-static struct proc_dir_entry *proc_tty_ldisc, *proc_tty_driver;
+static struct proc_dir_entry *proc_tty_driver;
/*
* This is the handler for /proc/tty/drivers
@@ -176,7 +176,7 @@ void __init proc_tty_init(void)
{
if (!proc_mkdir("tty", NULL))
return;
- proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL);
+ proc_mkdir("tty/ldisc", NULL); /* Preserved: it's userspace visible */
/*
* /proc/tty/driver/serial reveals the exact character counts for
* serial links which is just too easy to abuse for inferring
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5dbadecb234d..574bafc41f0b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -199,10 +199,10 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
{
- if (!proc_lookup(dir, dentry, flags))
+ if (!proc_pid_lookup(dir, dentry, flags))
return NULL;
- return proc_pid_lookup(dir, dentry, flags);
+ return proc_lookup(dir, dentry, flags);
}
static int proc_root_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 382aa890e228..a18e651a4d1c 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -328,6 +328,82 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
* virtually contiguous user-space in ELF layout.
*/
#ifdef CONFIG_MMU
+/*
+ * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages
+ * reported as not being ram with the zero page.
+ *
+ * @vma: vm_area_struct describing requested mapping
+ * @from: start remapping from
+ * @pfn: page frame number to start remapping to
+ * @size: remapping size
+ * @prot: protection bits
+ *
+ * Returns zero on success, -EAGAIN on failure.
+ */
+int remap_oldmem_pfn_checked(struct vm_area_struct *vma, unsigned long from,
+ unsigned long pfn, unsigned long size,
+ pgprot_t prot)
+{
+ unsigned long map_size;
+ unsigned long pos_start, pos_end, pos;
+ unsigned long zeropage_pfn = my_zero_pfn(0);
+ size_t len = 0;
+
+ pos_start = pfn;
+ pos_end = pfn + (size >> PAGE_SHIFT);
+
+ for (pos = pos_start; pos < pos_end; ++pos) {
+ if (!pfn_is_ram(pos)) {
+ /*
+ * We hit a page which is not ram. Remap the continuous
+ * region between pos_start and pos-1 and replace
+ * the non-ram page at pos with the zero page.
+ */
+ if (pos > pos_start) {
+ /* Remap continuous region */
+ map_size = (pos - pos_start) << PAGE_SHIFT;
+ if (remap_oldmem_pfn_range(vma, from + len,
+ pos_start, map_size,
+ prot))
+ goto fail;
+ len += map_size;
+ }
+ /* Remap the zero page */
+ if (remap_oldmem_pfn_range(vma, from + len,
+ zeropage_pfn,
+ PAGE_SIZE, prot))
+ goto fail;
+ len += PAGE_SIZE;
+ pos_start = pos + 1;
+ }
+ }
+ if (pos > pos_start) {
+ /* Remap the rest */
+ map_size = (pos - pos_start) << PAGE_SHIFT;
+ if (remap_oldmem_pfn_range(vma, from + len, pos_start,
+ map_size, prot))
+ goto fail;
+ }
+ return 0;
+fail:
+ do_munmap(vma->vm_mm, from, len);
+ return -EAGAIN;
+}
+
+int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ /*
+ * Check if oldmem_pfn_is_ram was registered to avoid
+ * looping over all pages without a reason.
+ */
+ if (oldmem_pfn_is_ram)
+ return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
+ else
+ return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
+}
+
static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
{
size_t size = vma->vm_end - vma->vm_start;
@@ -387,9 +463,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
tsz = min_t(size_t, m->offset + m->size - start, size);
paddr = m->paddr + start - m->offset;
- if (remap_oldmem_pfn_range(vma, vma->vm_start + len,
- paddr >> PAGE_SHIFT, tsz,
- vma->vm_page_prot))
+ if (vmcore_remap_oldmem_pfn(vma, vma->vm_start + len,
+ paddr >> PAGE_SHIFT, tsz,
+ vma->vm_page_prot))
goto fail;
size -= tsz;
start += tsz;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 34a1e5aa848c..9d7b9a83699e 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -394,7 +394,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
prot = pgprot_noncached(PAGE_KERNEL);
- pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL);
+ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
if (!pages) {
pr_err("%s: Failed to allocate array for %u pages\n",
__func__, page_count);
diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile
index 9dd06199afc9..5e6bae6fae50 100644
--- a/fs/qnx6/Makefile
+++ b/fs/qnx6/Makefile
@@ -5,3 +5,4 @@
obj-$(CONFIG_QNX6FS_FS) += qnx6.o
qnx6-objs := inode.o dir.o namei.o super_mmi.o
+ccflags-$(CONFIG_QNX6FS_DEBUG) += -DDEBUG
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index 15b7d92ed60d..8d64bb5366bf 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -77,21 +77,20 @@ static int qnx6_dir_longfilename(struct inode *inode,
if (de->de_size != 0xff) {
/* error - long filename entries always have size 0xff
in direntry */
- printk(KERN_ERR "qnx6: invalid direntry size (%i).\n",
- de->de_size);
+ pr_err("invalid direntry size (%i).\n", de->de_size);
return 0;
}
lf = qnx6_longname(s, de, &page);
if (IS_ERR(lf)) {
- printk(KERN_ERR "qnx6:Error reading longname\n");
+ pr_err("Error reading longname\n");
return 0;
}
lf_size = fs16_to_cpu(sbi, lf->lf_size);
if (lf_size > QNX6_LONG_NAME_MAX) {
- QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname));
- printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size);
+ pr_debug("file %s\n", lf->lf_fname);
+ pr_err("Filename too long (%i)\n", lf_size);
qnx6_put_page(page);
return 0;
}
@@ -100,10 +99,10 @@ static int qnx6_dir_longfilename(struct inode *inode,
mmi 3g filesystem does not have that checksum */
if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) !=
qnx6_lfile_checksum(lf->lf_fname, lf_size))
- printk(KERN_INFO "qnx6: long filename checksum error.\n");
+ pr_info("long filename checksum error.\n");
- QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n",
- lf_size, lf->lf_fname, de_inode));
+ pr_debug("qnx6_readdir:%.*s inode:%u\n",
+ lf_size, lf->lf_fname, de_inode);
if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) {
qnx6_put_page(page);
return 0;
@@ -136,7 +135,7 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
int i = start;
if (IS_ERR(page)) {
- printk(KERN_ERR "qnx6_readdir: read failed\n");
+ pr_err("%s(): read failed\n", __func__);
ctx->pos = (n + 1) << PAGE_CACHE_SHIFT;
return PTR_ERR(page);
}
@@ -159,9 +158,9 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
break;
}
} else {
- QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s"
- " inode:%u\n", size, de->de_fname,
- no_inode));
+ pr_debug("%s():%.*s inode:%u\n",
+ __func__, size, de->de_fname,
+ no_inode);
if (!dir_emit(ctx, de->de_fname, size,
no_inode, DT_UNKNOWN)) {
done = true;
@@ -259,8 +258,7 @@ unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
if (ino)
goto found;
} else
- printk(KERN_ERR "qnx6: undefined "
- "filename size in inode.\n");
+ pr_err("undefined filename size in inode.\n");
}
qnx6_put_page(page);
}
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 65cdaab3ed49..44e73923670d 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -73,8 +73,8 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
{
unsigned phys;
- QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n",
- inode->i_ino, (unsigned long)iblock));
+ pr_debug("qnx6_get_block inode=[%ld] iblock=[%ld]\n",
+ inode->i_ino, (unsigned long)iblock);
phys = qnx6_block_map(inode, iblock);
if (phys) {
@@ -87,7 +87,7 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
static int qnx6_check_blockptr(__fs32 ptr)
{
if (ptr == ~(__fs32)0) {
- printk(KERN_ERR "qnx6: hit unused blockpointer.\n");
+ pr_err("hit unused blockpointer.\n");
return 0;
}
return 1;
@@ -127,8 +127,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
levelptr = no >> bitdelta;
if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) {
- printk(KERN_ERR "qnx6:Requested file block number (%u) too big.",
- no);
+ pr_err("Requested file block number (%u) too big.", no);
return 0;
}
@@ -137,8 +136,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
for (i = 0; i < depth; i++) {
bh = sb_bread(s, block);
if (!bh) {
- printk(KERN_ERR "qnx6:Error reading block (%u)\n",
- block);
+ pr_err("Error reading block (%u)\n", block);
return 0;
}
bitdelta -= ptrbits;
@@ -207,26 +205,16 @@ void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s)
{
struct qnx6_sb_info *sbi = QNX6_SB(s);
- QNX6DEBUG((KERN_INFO "magic: %08x\n",
- fs32_to_cpu(sbi, sb->sb_magic)));
- QNX6DEBUG((KERN_INFO "checksum: %08x\n",
- fs32_to_cpu(sbi, sb->sb_checksum)));
- QNX6DEBUG((KERN_INFO "serial: %llx\n",
- fs64_to_cpu(sbi, sb->sb_serial)));
- QNX6DEBUG((KERN_INFO "flags: %08x\n",
- fs32_to_cpu(sbi, sb->sb_flags)));
- QNX6DEBUG((KERN_INFO "blocksize: %08x\n",
- fs32_to_cpu(sbi, sb->sb_blocksize)));
- QNX6DEBUG((KERN_INFO "num_inodes: %08x\n",
- fs32_to_cpu(sbi, sb->sb_num_inodes)));
- QNX6DEBUG((KERN_INFO "free_inodes: %08x\n",
- fs32_to_cpu(sbi, sb->sb_free_inodes)));
- QNX6DEBUG((KERN_INFO "num_blocks: %08x\n",
- fs32_to_cpu(sbi, sb->sb_num_blocks)));
- QNX6DEBUG((KERN_INFO "free_blocks: %08x\n",
- fs32_to_cpu(sbi, sb->sb_free_blocks)));
- QNX6DEBUG((KERN_INFO "inode_levels: %02x\n",
- sb->Inode.levels));
+ pr_debug("magic: %08x\n", fs32_to_cpu(sbi, sb->sb_magic));
+ pr_debug("checksum: %08x\n", fs32_to_cpu(sbi, sb->sb_checksum));
+ pr_debug("serial: %llx\n", fs64_to_cpu(sbi, sb->sb_serial));
+ pr_debug("flags: %08x\n", fs32_to_cpu(sbi, sb->sb_flags));
+ pr_debug("blocksize: %08x\n", fs32_to_cpu(sbi, sb->sb_blocksize));
+ pr_debug("num_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_num_inodes));
+ pr_debug("free_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_free_inodes));
+ pr_debug("num_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_num_blocks));
+ pr_debug("free_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_free_blocks));
+ pr_debug("inode_levels: %02x\n", sb->Inode.levels);
}
#endif
@@ -277,7 +265,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
start with the first superblock */
bh = sb_bread(s, offset);
if (!bh) {
- printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+ pr_err("unable to read the first superblock\n");
return NULL;
}
sb = (struct qnx6_super_block *)bh->b_data;
@@ -285,20 +273,16 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
sbi->s_bytesex = BYTESEX_BE;
if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
/* we got a big endian fs */
- QNX6DEBUG((KERN_INFO "qnx6: fs got different"
- " endianness.\n"));
+ pr_debug("fs got different endianness.\n");
return bh;
} else
sbi->s_bytesex = BYTESEX_LE;
if (!silent) {
if (offset == 0) {
- printk(KERN_ERR "qnx6: wrong signature (magic)"
- " in superblock #1.\n");
+ pr_err("wrong signature (magic) in superblock #1.\n");
} else {
- printk(KERN_INFO "qnx6: wrong signature (magic)"
- " at position (0x%lx) - will try"
- " alternative position (0x0000).\n",
- offset * s->s_blocksize);
+ pr_info("wrong signature (magic) at position (0x%lx) - will try alternative position (0x0000).\n",
+ offset * s->s_blocksize);
}
}
brelse(bh);
@@ -329,13 +313,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* Superblock always is 512 Byte long */
if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto outnobh;
}
/* parse the mount-options */
if (!qnx6_parse_options((char *) data, s)) {
- printk(KERN_ERR "qnx6: invalid mount options.\n");
+ pr_err("invalid mount options.\n");
goto outnobh;
}
if (test_opt(s, MMI_FS)) {
@@ -355,7 +339,7 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* try again without bootblock offset */
bh1 = qnx6_check_first_superblock(s, 0, silent);
if (!bh1) {
- printk(KERN_ERR "qnx6: unable to read the first superblock\n");
+ pr_err("unable to read the first superblock\n");
goto outnobh;
}
/* seems that no bootblock at partition start */
@@ -370,13 +354,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
/* set new blocksize */
if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto out;
}
/* blocksize invalidates bh - pull it back in */
@@ -398,21 +382,20 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
/* next the second superblock */
bh2 = sb_bread(s, offset);
if (!bh2) {
- printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+ pr_err("unable to read the second superblock\n");
goto out;
}
sb2 = (struct qnx6_super_block *)bh2->b_data;
if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent)
- printk(KERN_ERR "qnx6: wrong signature (magic)"
- " in superblock #2.\n");
+ pr_err("wrong signature (magic) in superblock #2.\n");
goto out;
}
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb2->sb_checksum) !=
crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #2 checksum error\n");
+ pr_err("superblock #2 checksum error\n");
goto out;
}
@@ -422,25 +405,24 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
sbi->sb_buf = bh1;
sbi->sb = (struct qnx6_super_block *)bh1->b_data;
brelse(bh2);
- printk(KERN_INFO "qnx6: superblock #1 active\n");
+ pr_info("superblock #1 active\n");
} else {
/* superblock #2 active */
sbi->sb_buf = bh2;
sbi->sb = (struct qnx6_super_block *)bh2->b_data;
brelse(bh1);
- printk(KERN_INFO "qnx6: superblock #2 active\n");
+ pr_info("superblock #2 active\n");
}
mmi_success:
/* sanity check - limit maximum indirect pointer levels */
if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) {
- printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n",
- QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
+ pr_err("too many inode levels (max %i, sb %i)\n",
+ QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
goto out;
}
if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) {
- printk(KERN_ERR "qnx6: too many longfilename levels"
- " (max %i, sb %i)\n",
- QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
+ pr_err("too many longfilename levels (max %i, sb %i)\n",
+ QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
goto out;
}
s->s_op = &qnx6_sops;
@@ -460,7 +442,7 @@ mmi_success:
/* prefetch root inode */
root = qnx6_iget(s, QNX6_ROOT_INO);
if (IS_ERR(root)) {
- printk(KERN_ERR "qnx6: get inode failed\n");
+ pr_err("get inode failed\n");
ret = PTR_ERR(root);
goto out2;
}
@@ -474,7 +456,7 @@ mmi_success:
errmsg = qnx6_checkroot(s);
if (errmsg != NULL) {
if (!silent)
- printk(KERN_ERR "qnx6: %s\n", errmsg);
+ pr_err("%s\n", errmsg);
goto out3;
}
return 0;
@@ -555,8 +537,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
inode->i_mode = 0;
if (ino == 0) {
- printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is "
- "out of range\n",
+ pr_err("bad inode number on dev %s: %u is out of range\n",
sb->s_id, ino);
iget_failed(inode);
return ERR_PTR(-EIO);
@@ -566,8 +547,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
mapping = sbi->inodes->i_mapping;
page = read_mapping_page(mapping, n, NULL);
if (IS_ERR(page)) {
- printk(KERN_ERR "qnx6: major problem: unable to read inode from "
- "dev %s\n", sb->s_id);
+ pr_err("major problem: unable to read inode from dev %s\n",
+ sb->s_id);
iget_failed(inode);
return ERR_CAST(page);
}
@@ -689,7 +670,7 @@ static int __init init_qnx6_fs(void)
return err;
}
- printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n");
+ pr_info("QNX6 filesystem 1.0.0 registered.\n");
return 0;
}
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 0561326a94f5..6c1a323137dd 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -29,12 +29,12 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
foundinode = qnx6_iget(dir->i_sb, ino);
qnx6_put_page(page);
if (IS_ERR(foundinode)) {
- QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> "
- " error %ld\n", PTR_ERR(foundinode)));
+ pr_debug("lookup->iget -> error %ld\n",
+ PTR_ERR(foundinode));
return ERR_CAST(foundinode);
}
} else {
- QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name));
+ pr_debug("%s(): not found %s\n", __func__, name);
return NULL;
}
d_add(dentry, foundinode);
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index b00fcc960d37..d3fb2b698800 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -10,6 +10,12 @@
*
*/
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -19,12 +25,6 @@ typedef __u64 __bitwise __fs64;
#include <linux/qnx6_fs.h>
-#ifdef CONFIG_QNX6FS_DEBUG
-#define QNX6DEBUG(X) printk X
-#else
-#define QNX6DEBUG(X) (void) 0
-#endif
-
struct qnx6_sb_info {
struct buffer_head *sb_buf; /* superblock buffer */
struct qnx6_super_block *sb; /* our superblock */
diff --git a/fs/qnx6/super_mmi.c b/fs/qnx6/super_mmi.c
index 29c32cba62d6..62aaf3e3126a 100644
--- a/fs/qnx6/super_mmi.c
+++ b/fs/qnx6/super_mmi.c
@@ -44,15 +44,14 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
start with the first superblock */
bh1 = sb_bread(s, 0);
if (!bh1) {
- printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n");
+ pr_err("Unable to read first mmi superblock\n");
return NULL;
}
sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
sbi = QNX6_SB(s);
if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent) {
- printk(KERN_ERR "qnx6: wrong signature (magic) in"
- " superblock #1.\n");
+ pr_err("wrong signature (magic) in superblock #1.\n");
goto out;
}
}
@@ -60,7 +59,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
@@ -70,7 +69,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* set new blocksize */
if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
- printk(KERN_ERR "qnx6: unable to set blocksize\n");
+ pr_err("unable to set blocksize\n");
goto out;
}
/* blocksize invalidates bh - pull it back in */
@@ -83,27 +82,26 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
/* read second superblock */
bh2 = sb_bread(s, offset);
if (!bh2) {
- printk(KERN_ERR "qnx6: unable to read the second superblock\n");
+ pr_err("unable to read the second superblock\n");
goto out;
}
sb2 = (struct qnx6_mmi_super_block *)bh2->b_data;
if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
if (!silent)
- printk(KERN_ERR "qnx6: wrong signature (magic) in"
- " superblock #2.\n");
+ pr_err("wrong signature (magic) in superblock #2.\n");
goto out;
}
/* checksum check - start at byte 8 and end at byte 512 */
if (fs32_to_cpu(sbi, sb2->sb_checksum)
!= crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
- printk(KERN_ERR "qnx6: superblock #1 checksum error\n");
+ pr_err("superblock #1 checksum error\n");
goto out;
}
qsb = kmalloc(sizeof(*qsb), GFP_KERNEL);
if (!qsb) {
- printk(KERN_ERR "qnx6: unable to allocate memory.\n");
+ pr_err("unable to allocate memory.\n");
goto out;
}
@@ -119,7 +117,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
sbi->sb_buf = bh1;
sbi->sb = (struct qnx6_super_block *)bh1->b_data;
brelse(bh2);
- printk(KERN_INFO "qnx6: superblock #1 active\n");
+ pr_info("superblock #1 active\n");
} else {
/* superblock #2 active */
qnx6_mmi_copy_sb(qsb, sb2);
@@ -131,7 +129,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
sbi->sb_buf = bh2;
sbi->sb = (struct qnx6_super_block *)bh2->b_data;
brelse(bh1);
- printk(KERN_INFO "qnx6: superblock #2 active\n");
+ pr_info("superblock #2 active\n");
}
kfree(qsb);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index dda012ad4208..bbafbde3471a 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -222,7 +222,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
/* gang-find the pages */
ret = -ENOMEM;
- pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL);
+ pages = kcalloc(lpages, sizeof(struct page *), GFP_KERNEL);
if (!pages)
goto out_free;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index d9f5a60dd59b..0a7dc941aaf4 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -9,7 +9,7 @@
#include <linux/stat.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
extern const struct reiserfs_key MIN_KEY;
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 54fdf196bfb2..5739cb99de7b 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -10,7 +10,7 @@
* and using buffers obtained after all above.
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/time.h>
#include "reiserfs.h"
#include <linux/buffer_head.h>
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index db9e80ba53a0..751dd3f4346b 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -6,7 +6,7 @@
#include "reiserfs.h"
#include "acl.h"
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/writeback.h>
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index 73231b1ebdbe..b751eea32e20 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -2,7 +2,7 @@
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/time.h>
#include "reiserfs.h"
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 63b2b0ec49e6..a7eec9888f10 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -11,7 +11,7 @@
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/unaligned.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 501ed6811a2b..6ec8a30a0911 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -7,7 +7,7 @@
#include <linux/mount.h>
#include "reiserfs.h"
#include <linux/time.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/pagemap.h>
#include <linux/compat.h>
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index cfaee912ee09..aca73dd73906 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -54,7 +54,7 @@ static void sd_print_item(struct item_head *ih, char *item)
} else {
struct stat_data *sd = (struct stat_data *)item;
- printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd),
+ printk("\t0%-6o | %6llu | %2u | %d | %s\n", sd_v2_mode(sd),
(unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
}
@@ -408,7 +408,7 @@ static void direntry_print_item(struct item_head *ih, char *item)
namebuf[namelen + 2] = 0;
}
- printk("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n",
+ printk("%d: %-15s%-15d%-15d%-15lld%-15lld(%s)\n",
i, namebuf,
deh_dir_id(deh), deh_objectid(deh),
GET_HASH_VALUE(deh_offset(deh)),
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index d6744c8b24e1..814dda3ec998 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -2,7 +2,7 @@
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
*/
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/time.h>
#include "reiserfs.h"
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index c9b47e91baf8..ae1dc841db3a 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -17,7 +17,7 @@ static char off_buf[80];
static char *reiserfs_cpu_offset(struct cpu_key *key)
{
if (cpu_key_k_type(key) == TYPE_DIRENTRY)
- sprintf(off_buf, "%Lu(%Lu)",
+ sprintf(off_buf, "%llu(%llu)",
(unsigned long long)
GET_HASH_VALUE(cpu_key_k_offset(key)),
(unsigned long long)
@@ -34,7 +34,7 @@ static char *le_offset(struct reiserfs_key *key)
version = le_key_version(key);
if (le_key_k_type(version, key) == TYPE_DIRENTRY)
- sprintf(off_buf, "%Lu(%Lu)",
+ sprintf(off_buf, "%llu(%llu)",
(unsigned long long)
GET_HASH_VALUE(le_key_k_offset(version, key)),
(unsigned long long)
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 02b0b7d0f7d5..621b9f381fe1 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -11,7 +11,7 @@
#include <linux/module.h>
#include <linux/time.h>
#include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "reiserfs.h"
#include <linux/init.h>
#include <linux/proc_fs.h>
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index dd44468edc2b..24cbe013240f 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -2006,7 +2006,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
&s_search_path) == POSITION_FOUND);
RFALSE(file_size > ROUND_UP(new_file_size),
- "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
+ "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
update_and_out:
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index a392cef6acc6..709ea92d716f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -15,7 +15,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/time.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "reiserfs.h"
#include "acl.h"
#include "xattr.h"
@@ -331,7 +331,7 @@ static int finish_unfinished(struct super_block *s)
* not completed truncate found. New size was
* committed together with "save" link
*/
- reiserfs_info(s, "Truncating %k to %Ld ..",
+ reiserfs_info(s, "Truncating %k to %lld ..",
INODE_PKEY(inode), inode->i_size);
/* don't update modification time */
@@ -1577,7 +1577,7 @@ static int read_super_block(struct super_block *s, int offset)
rs = (struct reiserfs_super_block *)bh->b_data;
if (sb_blocksize(rs) != s->s_blocksize) {
reiserfs_warning(s, "sh-2011", "can't find a reiserfs "
- "filesystem on (dev %s, block %Lu, size %lu)",
+ "filesystem on (dev %s, block %llu, size %lu)",
s->s_id,
(unsigned long long)bh->b_blocknr,
s->s_blocksize);
@@ -2441,8 +2441,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
struct buffer_head tmp_bh, *bh;
if (!current->journal_info) {
- printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)"
- " cancelled because transaction is not started.\n",
+ printk(KERN_WARNING "reiserfs: Quota write (off=%llu, len=%llu) cancelled because transaction is not started.\n",
(unsigned long long)off, (unsigned long long)len);
return -EIO;
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ca416d099e7d..7c36898af402 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -45,7 +45,7 @@
#include <linux/xattr.h>
#include "xattr.h"
#include "acl.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <net/checksum.h>
#include <linux/stat.h>
#include <linux/quotaops.h>
@@ -84,6 +84,7 @@ static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
static int xattr_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
+
BUG_ON(!mutex_is_locked(&dir->i_mutex));
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -98,6 +99,7 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
+
BUG_ON(!mutex_is_locked(&dir->i_mutex));
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -117,6 +119,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
{
struct dentry *privroot = REISERFS_SB(sb)->priv_root;
struct dentry *xaroot;
+
if (!privroot->d_inode)
return ERR_PTR(-ENODATA);
@@ -127,6 +130,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
xaroot = ERR_PTR(-ENODATA);
else if (!xaroot->d_inode) {
int err = -ENODATA;
+
if (xattr_may_create(flags))
err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
if (err) {
@@ -157,6 +161,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
if (!IS_ERR(xadir) && !xadir->d_inode) {
int err = -ENODATA;
+
if (xattr_may_create(flags))
err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
if (err) {
@@ -188,6 +193,7 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
{
struct reiserfs_dentry_buf *dbuf = buf;
struct dentry *dentry;
+
WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
@@ -218,6 +224,7 @@ static void
cleanup_dentry_buf(struct reiserfs_dentry_buf *buf)
{
int i;
+
for (i = 0; i < buf->count; i++)
if (buf->dentries[i])
dput(buf->dentries[i]);
@@ -283,11 +290,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
struct reiserfs_transaction_handle th;
+
reiserfs_write_lock(inode->i_sb);
err = journal_begin(&th, inode->i_sb, blocks);
reiserfs_write_unlock(inode->i_sb);
if (!err) {
int jerror;
+
mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
I_MUTEX_XATTR);
err = action(dir, data);
@@ -340,6 +349,7 @@ static int chown_one_xattr(struct dentry *dentry, void *data)
int reiserfs_delete_xattrs(struct inode *inode)
{
int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL);
+
if (err)
reiserfs_warning(inode->i_sb, "jdm-20004",
"Couldn't delete all xattrs (%d)\n", err);
@@ -350,6 +360,7 @@ int reiserfs_delete_xattrs(struct inode *inode)
int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
{
int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs);
+
if (err)
reiserfs_warning(inode->i_sb, "jdm-20007",
"Couldn't chown all xattrs (%d)\n", err);
@@ -439,6 +450,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
static void update_ctime(struct inode *inode)
{
struct timespec now = current_fs_time(inode->i_sb);
+
if (inode_unhashed(inode) || !inode->i_nlink ||
timespec_equal(&inode->i_ctime, &now))
return;
@@ -514,6 +526,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
size_t chunk;
size_t skip = 0;
size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
+
if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE;
else
@@ -530,6 +543,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
if (file_pos == 0) {
struct reiserfs_xattr_header *rxh;
+
skip = file_pos = sizeof(struct reiserfs_xattr_header);
if (chunk + skip > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE - skip;
@@ -659,6 +673,7 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
size_t chunk;
char *data;
size_t skip = 0;
+
if (isize - file_pos > PAGE_CACHE_SIZE)
chunk = PAGE_CACHE_SIZE;
else
@@ -792,6 +807,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
int reiserfs_removexattr(struct dentry *dentry, const char *name)
{
const struct xattr_handler *handler;
+
handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
@@ -813,9 +829,11 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
{
struct listxattr_buf *b = (struct listxattr_buf *)buf;
size_t size;
+
if (name[0] != '.' ||
(namelen != 1 && (name[1] != '.' || namelen != 2))) {
const struct xattr_handler *handler;
+
handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
name);
if (!handler) /* Unsupported xattr name */
@@ -885,6 +903,7 @@ static int create_privroot(struct dentry *dentry)
{
int err;
struct inode *inode = dentry->d_parent->d_inode;
+
WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
err = xattr_mkdir(inode, dentry, 0700);
@@ -1015,6 +1034,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
mutex_lock(&privroot->d_inode->i_mutex);
if (!REISERFS_SB(s)->xattr_root) {
struct dentry *dentry;
+
dentry = lookup_one_len(XAROOT_NAME, privroot,
strlen(XAROOT_NAME));
if (!IS_ERR(dentry))
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 44503e293790..4b34b9dc03dd 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -9,7 +9,7 @@
#include <linux/posix_acl_xattr.h>
#include "xattr.h"
#include "acl.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th,
struct inode *inode, int type,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 800a3cef6f62..e7f8939a4cb5 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,7 +6,7 @@
#include <linux/slab.h>
#include "xattr.h"
#include <linux/security.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a0035719f66b..5eeb0c48ba46 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -5,7 +5,7 @@
#include <linux/pagemap.h>
#include <linux/xattr.h>
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 8667491ae7c3..e50eab046471 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -4,7 +4,7 @@
#include <linux/pagemap.h>
#include <linux/xattr.h>
#include "xattr.h"
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
static int
user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index ef90e8bca95a..e98dd88197d5 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -56,6 +56,8 @@
* 2 of the Licence, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/string.h>
#include <linux/fs.h>
@@ -380,7 +382,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
eio:
ret = -EIO;
error:
- printk(KERN_ERR "ROMFS: read error for inode 0x%lx\n", pos);
+ pr_err("read error for inode 0x%lx\n", pos);
return ERR_PTR(ret);
}
@@ -390,6 +392,7 @@ error:
static struct inode *romfs_alloc_inode(struct super_block *sb)
{
struct romfs_inode_info *inode;
+
inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL);
return inode ? &inode->vfs_inode : NULL;
}
@@ -400,6 +403,7 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
static void romfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
+
kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
}
@@ -507,15 +511,13 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 ||
img_size < ROMFH_SIZE) {
if (!silent)
- printk(KERN_WARNING "VFS:"
- " Can't find a romfs filesystem on dev %s.\n",
+ pr_warn("VFS: Can't find a romfs filesystem on dev %s.\n",
sb->s_id);
goto error_rsb_inval;
}
if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) {
- printk(KERN_ERR "ROMFS: bad initial checksum on dev %s.\n",
- sb->s_id);
+ pr_err("bad initial checksum on dev %s.\n", sb->s_id);
goto error_rsb_inval;
}
@@ -523,8 +525,8 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
len = strnlen(rsb->name, ROMFS_MAXFN);
if (!silent)
- printk(KERN_NOTICE "ROMFS: Mounting image '%*.*s' through %s\n",
- (unsigned) len, (unsigned) len, rsb->name, storage);
+ pr_notice("Mounting image '%*.*s' through %s\n",
+ (unsigned) len, (unsigned) len, rsb->name, storage);
kfree(rsb);
rsb = NULL;
@@ -614,7 +616,7 @@ static int __init init_romfs_fs(void)
{
int ret;
- printk(KERN_INFO "ROMFS MTD (C) 2007 Red Hat, Inc.\n");
+ pr_info("ROMFS MTD (C) 2007 Red Hat, Inc.\n");
romfs_inode_cachep =
kmem_cache_create("romfs_i",
@@ -623,13 +625,12 @@ static int __init init_romfs_fs(void)
romfs_i_init_once);
if (!romfs_inode_cachep) {
- printk(KERN_ERR
- "ROMFS error: Failed to initialise inode cache\n");
+ pr_err("Failed to initialise inode cache\n");
return -ENOMEM;
}
ret = register_filesystem(&romfs_fs_type);
if (ret) {
- printk(KERN_ERR "ROMFS error: Failed to register filesystem\n");
+ pr_err("Failed to register filesystem\n");
goto error_register;
}
return 0;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3857b720cb1b..1d641bb108d2 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -8,10 +8,8 @@
#include <linux/fs.h>
#include <linux/export.h>
#include <linux/seq_file.h>
-#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/cred.h>
-#include <linux/mm.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -32,16 +30,6 @@ static void seq_set_overflow(struct seq_file *m)
m->count = m->size;
}
-static void *seq_buf_alloc(unsigned long size)
-{
- void *buf;
-
- buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
- if (!buf && size > PAGE_SIZE)
- buf = vmalloc(size);
- return buf;
-}
-
/**
* seq_open - initialize sequential file
* @file: file we initialize
@@ -108,7 +96,7 @@ static int traverse(struct seq_file *m, loff_t offset)
return 0;
}
if (!m->buf) {
- m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
+ m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
if (!m->buf)
return -ENOMEM;
}
@@ -147,9 +135,9 @@ static int traverse(struct seq_file *m, loff_t offset)
Eoverflow:
m->op->stop(m, p);
- kvfree(m->buf);
+ kfree(m->buf);
m->count = 0;
- m->buf = seq_buf_alloc(m->size <<= 1);
+ m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
return !m->buf ? -ENOMEM : -EAGAIN;
}
@@ -204,7 +192,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
/* grab buffer if we didn't have one */
if (!m->buf) {
- m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
+ m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
if (!m->buf)
goto Enomem;
}
@@ -244,9 +232,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
if (m->count < m->size)
goto Fill;
m->op->stop(m, p);
- kvfree(m->buf);
+ kfree(m->buf);
m->count = 0;
- m->buf = seq_buf_alloc(m->size <<= 1);
+ m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
if (!m->buf)
goto Enomem;
m->version = 0;
@@ -362,7 +350,7 @@ EXPORT_SYMBOL(seq_lseek);
int seq_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
- kvfree(m->buf);
+ kfree(m->buf);
kfree(m);
return 0;
}
@@ -617,13 +605,13 @@ EXPORT_SYMBOL(single_open);
int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
void *data, size_t size)
{
- char *buf = seq_buf_alloc(size);
+ char *buf = kmalloc(size, GFP_KERNEL);
int ret;
if (!buf)
return -ENOMEM;
ret = single_open(file, show, data);
if (ret) {
- kvfree(buf);
+ kfree(buf);
return ret;
}
((struct seq_file *)file->private_data)->buf = buf;
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 62a0de6632e1..43e7a7eddac0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -44,7 +44,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
pages = end_index - start_index + 1;
- page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+ page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
if (page == NULL)
return res;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 031c8d67fd51..5056babe00df 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -27,6 +27,8 @@
* the filesystem.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/slab.h>
@@ -448,8 +450,7 @@ static int __init init_squashfs_fs(void)
return err;
}
- printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
- "Phillip Lougher\n");
+ pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
return 0;
}
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 2290d5866725..fb08b0c514b6 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -431,7 +431,7 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
/**
* wbuf_timer_callback - write-buffer timer callback function.
- * @data: timer data (write-buffer descriptor)
+ * @timer: timer data (write-buffer descriptor)
*
* This function is called when the write-buffer timer expires.
*/
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index c14adb2f420c..e2562961dd61 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -596,7 +596,6 @@ static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
* drop_last_node - drop the last node.
* @sleb: scanned LEB information
* @offs: offset of dropped nodes is returned here
- * @grouped: non-zero if whole group of nodes have to be dropped
*
* This is a helper function for 'ubifs_recover_leb()' which drops the last
* node of the scanned LEB.
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3904c8574ef9..e5bd068cdb6c 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -424,19 +424,19 @@ static int ubifs_show_options(struct seq_file *s, struct dentry *root)
struct ubifs_info *c = root->d_sb->s_fs_info;
if (c->mount_opts.unmount_mode == 2)
- seq_printf(s, ",fast_unmount");
+ seq_puts(s, ",fast_unmount");
else if (c->mount_opts.unmount_mode == 1)
- seq_printf(s, ",norm_unmount");
+ seq_puts(s, ",norm_unmount");
if (c->mount_opts.bulk_read == 2)
- seq_printf(s, ",bulk_read");
+ seq_puts(s, ",bulk_read");
else if (c->mount_opts.bulk_read == 1)
- seq_printf(s, ",no_bulk_read");
+ seq_puts(s, ",no_bulk_read");
if (c->mount_opts.chk_data_crc == 2)
- seq_printf(s, ",chk_data_crc");
+ seq_puts(s, ",chk_data_crc");
else if (c->mount_opts.chk_data_crc == 1)
- seq_printf(s, ",no_chk_data_crc");
+ seq_puts(s, ",no_chk_data_crc");
if (c->mount_opts.override_compr) {
seq_printf(s, ",compr=%s",
diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile
index dd39980437fc..4d0e02b022b3 100644
--- a/fs/ufs/Makefile
+++ b/fs/ufs/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_UFS_FS) += ufs.o
ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
namei.o super.o symlink.o truncate.o util.o
+ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 61e8a9b021dd..7c580c97990e 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -158,16 +158,16 @@ out:
/**
* ufs_inode_getfrag() - allocate new fragment(s)
- * @inode - pointer to inode
- * @fragment - number of `fragment' which hold pointer
+ * @inode: pointer to inode
+ * @fragment: number of `fragment' which hold pointer
* to new allocated fragment(s)
- * @new_fragment - number of new allocated fragment(s)
- * @required - how many fragment(s) we require
- * @err - we set it if something wrong
- * @phys - pointer to where we save physical number of new allocated fragments,
+ * @new_fragment: number of new allocated fragment(s)
+ * @required: how many fragment(s) we require
+ * @err: we set it if something wrong
+ * @phys: pointer to where we save physical number of new allocated fragments,
* NULL if we allocate not data(indirect blocks for example).
- * @new - we set it if we allocate new block
- * @locked_page - for ufs_new_fragments()
+ * @new: we set it if we allocate new block
+ * @locked_page: for ufs_new_fragments()
*/
static struct buffer_head *
ufs_inode_getfrag(struct inode *inode, u64 fragment,
@@ -315,16 +315,16 @@ repeat2:
/**
* ufs_inode_getblock() - allocate new block
- * @inode - pointer to inode
- * @bh - pointer to block which hold "pointer" to new allocated block
- * @fragment - number of `fragment' which hold pointer
+ * @inode: pointer to inode
+ * @bh: pointer to block which hold "pointer" to new allocated block
+ * @fragment: number of `fragment' which hold pointer
* to new allocated block
- * @new_fragment - number of new allocated fragment
+ * @new_fragment: number of new allocated fragment
* (block will hold this fragment and also uspi->s_fpb-1)
- * @err - see ufs_inode_getfrag()
- * @phys - see ufs_inode_getfrag()
- * @new - see ufs_inode_getfrag()
- * @locked_page - see ufs_inode_getfrag()
+ * @err: see ufs_inode_getfrag()
+ * @phys: see ufs_inode_getfrag()
+ * @new: see ufs_inode_getfrag()
+ * @locked_page: see ufs_inode_getfrag()
*/
static struct buffer_head *
ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b879f1ba3439..da73801301d5 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -65,7 +65,6 @@
* Evgeniy Dushistov <dushistov@mail.ru>, 2007
*/
-
#include <linux/exportfs.h>
#include <linux/module.h>
#include <linux/bitops.h>
@@ -172,73 +171,73 @@ static void ufs_print_super_stuff(struct super_block *sb,
{
u32 magic = fs32_to_cpu(sb, usb3->fs_magic);
- printk("ufs_print_super_stuff\n");
- printk(" magic: 0x%x\n", magic);
+ pr_debug("ufs_print_super_stuff\n");
+ pr_debug(" magic: 0x%x\n", magic);
if (fs32_to_cpu(sb, usb3->fs_magic) == UFS2_MAGIC) {
- printk(" fs_size: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
- printk(" fs_dsize: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
- printk(" bsize: %u\n",
- fs32_to_cpu(sb, usb1->fs_bsize));
- printk(" fsize: %u\n",
- fs32_to_cpu(sb, usb1->fs_fsize));
- printk(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname);
- printk(" fs_sblockloc: %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
- printk(" cs_ndir(No of dirs): %llu\n", (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
- printk(" cs_nbfree(No of free blocks): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
- printk(KERN_INFO" cs_nifree(Num of free inodes): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
- printk(KERN_INFO" cs_nffree(Num of free frags): %llu\n",
- (unsigned long long)
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
- printk(KERN_INFO" fs_maxsymlinklen: %u\n",
- fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
+ pr_debug(" fs_size: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
+ pr_debug(" fs_dsize: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
+ pr_debug(" bsize: %u\n",
+ fs32_to_cpu(sb, usb1->fs_bsize));
+ pr_debug(" fsize: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fsize));
+ pr_debug(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname);
+ pr_debug(" fs_sblockloc: %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
+ pr_debug(" cs_ndir(No of dirs): %llu\n", (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
+ pr_debug(" cs_nbfree(No of free blocks): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
+ pr_info(" cs_nifree(Num of free inodes): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
+ pr_info(" cs_nffree(Num of free frags): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
+ pr_info(" fs_maxsymlinklen: %u\n",
+ fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
} else {
- printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
- printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
- printk(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
- printk(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
- printk(" cgoffset: %u\n",
- fs32_to_cpu(sb, usb1->fs_cgoffset));
- printk(" ~cgmask: 0x%x\n",
- ~fs32_to_cpu(sb, usb1->fs_cgmask));
- printk(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size));
- printk(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
- printk(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
- printk(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
- printk(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
- printk(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag));
- printk(" fragshift: %u\n",
- fs32_to_cpu(sb, usb1->fs_fragshift));
- printk(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
- printk(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
- printk(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
- printk(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc));
- printk(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
- printk(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
- printk(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
- printk(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
- printk(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
- printk(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
- printk(" fstodb: %u\n",
- fs32_to_cpu(sb, usb1->fs_fsbtodb));
- printk(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
- printk(" ndir %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
- printk(" nifree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
- printk(" nbfree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
- printk(" nffree %u\n",
- fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
+ pr_debug(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
+ pr_debug(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
+ pr_debug(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
+ pr_debug(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
+ pr_debug(" cgoffset: %u\n",
+ fs32_to_cpu(sb, usb1->fs_cgoffset));
+ pr_debug(" ~cgmask: 0x%x\n",
+ ~fs32_to_cpu(sb, usb1->fs_cgmask));
+ pr_debug(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size));
+ pr_debug(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
+ pr_debug(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
+ pr_debug(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
+ pr_debug(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
+ pr_debug(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag));
+ pr_debug(" fragshift: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fragshift));
+ pr_debug(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
+ pr_debug(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
+ pr_debug(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
+ pr_debug(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc));
+ pr_debug(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
+ pr_debug(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
+ pr_debug(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
+ pr_debug(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
+ pr_debug(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
+ pr_debug(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
+ pr_debug(" fstodb: %u\n",
+ fs32_to_cpu(sb, usb1->fs_fsbtodb));
+ pr_debug(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
+ pr_debug(" ndir %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
+ pr_debug(" nifree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
+ pr_debug(" nbfree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
+ pr_debug(" nffree %u\n",
+ fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
}
- printk("\n");
+ pr_debug("\n");
}
/*
@@ -247,38 +246,38 @@ static void ufs_print_super_stuff(struct super_block *sb,
static void ufs_print_cylinder_stuff(struct super_block *sb,
struct ufs_cylinder_group *cg)
{
- printk("\nufs_print_cylinder_stuff\n");
- printk("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
- printk(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic));
- printk(" time: %u\n", fs32_to_cpu(sb, cg->cg_time));
- printk(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx));
- printk(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
- printk(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk));
- printk(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
- printk(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
- printk(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
- printk(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
- printk(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
- printk(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor));
- printk(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor));
- printk(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor));
- printk(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n",
+ pr_debug("\nufs_print_cylinder_stuff\n");
+ pr_debug("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
+ pr_debug(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic));
+ pr_debug(" time: %u\n", fs32_to_cpu(sb, cg->cg_time));
+ pr_debug(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx));
+ pr_debug(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
+ pr_debug(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk));
+ pr_debug(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
+ pr_debug(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
+ pr_debug(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
+ pr_debug(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
+ pr_debug(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
+ pr_debug(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor));
+ pr_debug(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor));
+ pr_debug(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor));
+ pr_debug(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n",
fs32_to_cpu(sb, cg->cg_frsum[0]), fs32_to_cpu(sb, cg->cg_frsum[1]),
fs32_to_cpu(sb, cg->cg_frsum[2]), fs32_to_cpu(sb, cg->cg_frsum[3]),
fs32_to_cpu(sb, cg->cg_frsum[4]), fs32_to_cpu(sb, cg->cg_frsum[5]),
fs32_to_cpu(sb, cg->cg_frsum[6]), fs32_to_cpu(sb, cg->cg_frsum[7]));
- printk(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
- printk(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff));
- printk(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
- printk(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
- printk(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
- printk(" clustersumoff %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
- printk(" clusteroff %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
- printk(" nclusterblks %u\n",
- fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
- printk("\n");
+ pr_debug(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
+ pr_debug(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff));
+ pr_debug(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
+ pr_debug(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
+ pr_debug(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
+ pr_debug(" clustersumoff %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
+ pr_debug(" clusteroff %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
+ pr_debug(" nclusterblks %u\n",
+ fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
+ pr_debug("\n");
}
#else
# define ufs_print_super_stuff(sb, usb1, usb2, usb3) /**/
@@ -287,13 +286,12 @@ static void ufs_print_cylinder_stuff(struct super_block *sb,
static const struct super_operations ufs_super_ops;
-static char error_buf[1024];
-
void ufs_error (struct super_block * sb, const char * function,
const char * fmt, ...)
{
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
+ struct va_format vaf;
va_list args;
uspi = UFS_SB(sb)->s_uspi;
@@ -305,20 +303,21 @@ void ufs_error (struct super_block * sb, const char * function,
ufs_mark_sb_dirty(sb);
sb->s_flags |= MS_RDONLY;
}
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) {
case UFS_MOUNT_ONERROR_PANIC:
- panic ("UFS-fs panic (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ panic("panic (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
case UFS_MOUNT_ONERROR_LOCK:
case UFS_MOUNT_ONERROR_UMOUNT:
case UFS_MOUNT_ONERROR_REPAIR:
- printk (KERN_CRIT "UFS-fs error (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
- }
+ pr_crit("error (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ }
+ va_end(args);
}
void ufs_panic (struct super_block * sb, const char * function,
@@ -326,6 +325,7 @@ void ufs_panic (struct super_block * sb, const char * function,
{
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
+ struct va_format vaf;
va_list args;
uspi = UFS_SB(sb)->s_uspi;
@@ -336,24 +336,27 @@ void ufs_panic (struct super_block * sb, const char * function,
ubh_mark_buffer_dirty(USPI_UBH(uspi));
ufs_mark_sb_dirty(sb);
}
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
sb->s_flags |= MS_RDONLY;
- printk (KERN_CRIT "UFS-fs panic (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ pr_crit("panic (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ va_end(args);
}
void ufs_warning (struct super_block * sb, const char * function,
const char * fmt, ...)
{
+ struct va_format vaf;
va_list args;
- va_start (args, fmt);
- vsnprintf (error_buf, sizeof(error_buf), fmt, args);
- va_end (args);
- printk (KERN_WARNING "UFS-fs warning (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_warn("(device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+ va_end(args);
}
enum {
@@ -464,14 +467,12 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
ufs_set_opt (*mount_options, ONERROR_UMOUNT);
break;
case Opt_onerror_repair:
- printk("UFS-fs: Unable to do repair on error, "
- "will lock lock instead\n");
+ pr_err("Unable to do repair on error, will lock lock instead\n");
ufs_clear_opt (*mount_options, ONERROR);
ufs_set_opt (*mount_options, ONERROR_REPAIR);
break;
default:
- printk("UFS-fs: Invalid option: \"%s\" "
- "or missing value\n", p);
+ pr_err("Invalid option: \"%s\" or missing value\n", p);
return 0;
}
}
@@ -788,8 +789,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
#ifndef CONFIG_UFS_FS_WRITE
if (!(sb->s_flags & MS_RDONLY)) {
- printk("ufs was compiled with read-only support, "
- "can't be mounted as read-write\n");
+ pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
return -EROFS;
}
#endif
@@ -812,12 +812,12 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_mount_opt = 0;
ufs_set_opt (sbi->s_mount_opt, ONERROR_LOCK);
if (!ufs_parse_options ((char *) data, &sbi->s_mount_opt)) {
- printk("wrong mount options\n");
+ pr_err("wrong mount options\n");
goto failed;
}
if (!(sbi->s_mount_opt & UFS_MOUNT_UFSTYPE)) {
if (!silent)
- printk("You didn't specify the type of your ufs filesystem\n\n"
+ pr_err("You didn't specify the type of your ufs filesystem\n\n"
"mount -t ufs -o ufstype="
"sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n"
">>>WARNING<<< Wrong ufstype may corrupt your filesystem, "
@@ -868,7 +868,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
break;
case UFS_MOUNT_UFSTYPE_SUNOS:
- UFSD(("ufstype=sunos\n"))
+ UFSD("ufstype=sunos\n");
uspi->s_fsize = block_size = 1024;
uspi->s_fmask = ~(1024 - 1);
uspi->s_fshift = 10;
@@ -900,7 +900,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=old is supported read-only\n");
+ pr_info("ufstype=old is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -916,7 +916,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=nextstep is supported read-only\n");
+ pr_info("ufstype=nextstep is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -932,7 +932,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=nextstep-cd is supported read-only\n");
+ pr_info("ufstype=nextstep-cd is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -948,7 +948,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=openstep is supported read-only\n");
+ pr_info("ufstype=openstep is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
@@ -963,19 +963,19 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
- printk(KERN_INFO "ufstype=hp is supported read-only\n");
+ pr_info("ufstype=hp is supported read-only\n");
sb->s_flags |= MS_RDONLY;
}
break;
default:
if (!silent)
- printk("unknown ufstype\n");
+ pr_err("unknown ufstype\n");
goto failed;
}
again:
if (!sb_set_blocksize(sb, block_size)) {
- printk(KERN_ERR "UFS: failed to set blocksize\n");
+ pr_err("failed to set blocksize\n");
goto failed;
}
@@ -1034,7 +1034,7 @@ again:
goto again;
}
if (!silent)
- printk("ufs_read_super: bad magic number\n");
+ pr_err("%s(): bad magic number\n", __func__);
goto failed;
magic_found:
@@ -1048,33 +1048,33 @@ magic_found:
uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift);
if (!is_power_of_2(uspi->s_fsize)) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n",
- uspi->s_fsize);
- goto failed;
+ pr_err("%s(): fragment size %u is not a power of 2\n",
+ __func__, uspi->s_fsize);
+ goto failed;
}
if (uspi->s_fsize < 512) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is too small\n",
- uspi->s_fsize);
+ pr_err("%s(): fragment size %u is too small\n",
+ __func__, uspi->s_fsize);
goto failed;
}
if (uspi->s_fsize > 4096) {
- printk(KERN_ERR "ufs_read_super: fragment size %u is too large\n",
- uspi->s_fsize);
+ pr_err("%s(): fragment size %u is too large\n",
+ __func__, uspi->s_fsize);
goto failed;
}
if (!is_power_of_2(uspi->s_bsize)) {
- printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n",
- uspi->s_bsize);
+ pr_err("%s(): block size %u is not a power of 2\n",
+ __func__, uspi->s_bsize);
goto failed;
}
if (uspi->s_bsize < 4096) {
- printk(KERN_ERR "ufs_read_super: block size %u is too small\n",
- uspi->s_bsize);
+ pr_err("%s(): block size %u is too small\n",
+ __func__, uspi->s_bsize);
goto failed;
}
if (uspi->s_bsize / uspi->s_fsize > 8) {
- printk(KERN_ERR "ufs_read_super: too many fragments per block (%u)\n",
- uspi->s_bsize / uspi->s_fsize);
+ pr_err("%s(): too many fragments per block (%u)\n",
+ __func__, uspi->s_bsize / uspi->s_fsize);
goto failed;
}
if (uspi->s_fsize != block_size || uspi->s_sbsize != super_block_size) {
@@ -1113,20 +1113,21 @@ magic_found:
UFSD("fs is DEC OSF/1\n");
break;
case UFS_FSACTIVE:
- printk("ufs_read_super: fs is active\n");
+ pr_err("%s(): fs is active\n", __func__);
sb->s_flags |= MS_RDONLY;
break;
case UFS_FSBAD:
- printk("ufs_read_super: fs is bad\n");
+ pr_err("%s(): fs is bad\n", __func__);
sb->s_flags |= MS_RDONLY;
break;
default:
- printk("ufs_read_super: can't grok fs_clean 0x%x\n", usb1->fs_clean);
+ pr_err("%s(): can't grok fs_clean 0x%x\n",
+ __func__, usb1->fs_clean);
sb->s_flags |= MS_RDONLY;
break;
}
} else {
- printk("ufs_read_super: fs needs fsck\n");
+ pr_err("%s(): fs needs fsck\n", __func__);
sb->s_flags |= MS_RDONLY;
}
@@ -1299,7 +1300,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
new_mount_opt |= ufstype;
} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
- printk("ufstype can't be changed during remount\n");
+ pr_err("ufstype can't be changed during remount\n");
unlock_ufs(sb);
return -EINVAL;
}
@@ -1328,8 +1329,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
* fs was mounted as ro, remounting rw
*/
#ifndef CONFIG_UFS_FS_WRITE
- printk("ufs was compiled with read-only support, "
- "can't be mounted as read-write\n");
+ pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
unlock_ufs(sb);
return -EINVAL;
#else
@@ -1338,12 +1338,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufstype != UFS_MOUNT_UFSTYPE_44BSD &&
ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
- printk("this ufstype is read-only supported\n");
+ pr_err("this ufstype is read-only supported\n");
unlock_ufs(sb);
return -EINVAL;
}
if (!ufs_read_cylinder_structures(sb)) {
- printk("failed during remounting\n");
+ pr_err("failed during remounting\n");
unlock_ufs(sb);
return -EPERM;
}
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 343e6fc571e5..2a07396d5f9e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -1,6 +1,12 @@
#ifndef _UFS_UFS_H
#define _UFS_UFS_H 1
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#define UFS_MAX_GROUP_LOADED 8
#define UFS_CGNO_EMPTY ((unsigned)-1)
@@ -71,9 +77,9 @@ struct ufs_inode_info {
*/
#ifdef CONFIG_UFS_DEBUG
# define UFSD(f, a...) { \
- printk ("UFSD (%s, %d): %s:", \
+ pr_debug("UFSD (%s, %d): %s:", \
__FILE__, __LINE__, __func__); \
- printk (f, ## a); \
+ pr_debug(f, ## a); \
}
#else
# define UFSD(f, a...) /**/
diff --git a/fs/xattr.c b/fs/xattr.c
index 3377dff18404..c69e6d43a0d2 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -843,7 +843,7 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
/* wrap around? */
len = sizeof(*new_xattr) + size;
- if (len <= sizeof(*new_xattr))
+ if (len < sizeof(*new_xattr))
return NULL;
new_xattr = kmalloc(len, GFP_KERNEL);
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 7ad634501e48..e1c8d080c427 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -88,32 +88,32 @@
* lib/bitmap.c provides these functions:
*/
-extern int __bitmap_empty(const unsigned long *bitmap, int bits);
-extern int __bitmap_full(const unsigned long *bitmap, int bits);
+extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
+extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
extern int __bitmap_equal(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
- int bits);
+ unsigned int nbits);
extern void __bitmap_shift_right(unsigned long *dst,
const unsigned long *src, int shift, int bits);
extern void __bitmap_shift_left(unsigned long *dst,
const unsigned long *src, int shift, int bits);
extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern int __bitmap_intersects(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
extern int __bitmap_subset(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits);
-extern int __bitmap_weight(const unsigned long *bitmap, int bits);
+ const unsigned long *bitmap2, unsigned int nbits);
+extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
-extern void bitmap_set(unsigned long *map, int i, int len);
-extern void bitmap_clear(unsigned long *map, int start, int nr);
+extern void bitmap_set(unsigned long *map, unsigned int start, int len);
+extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
unsigned long size,
unsigned long start,
@@ -140,9 +140,9 @@ extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
const unsigned long *relmap, int bits);
extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
int sz, int bits);
-extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
-extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
-extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
+extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
+extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
+extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
@@ -188,15 +188,15 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
}
static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
- return (*dst = *src1 & *src2) != 0;
+ return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_and(dst, src1, src2, nbits);
}
static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = *src1 | *src2;
@@ -205,7 +205,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
}
static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = *src1 ^ *src2;
@@ -214,24 +214,24 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
}
static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
- return (*dst = *src1 & ~(*src2)) != 0;
+ return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_andnot(dst, src1, src2, nbits);
}
static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
- int nbits)
+ unsigned int nbits)
{
if (small_const_nbits(nbits))
- *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+ *dst = ~(*src);
else
__bitmap_complement(dst, src, nbits);
}
static inline int bitmap_equal(const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
@@ -240,7 +240,7 @@ static inline int bitmap_equal(const unsigned long *src1,
}
static inline int bitmap_intersects(const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
@@ -249,7 +249,7 @@ static inline int bitmap_intersects(const unsigned long *src1,
}
static inline int bitmap_subset(const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
@@ -257,7 +257,7 @@ static inline int bitmap_subset(const unsigned long *src1,
return __bitmap_subset(src1, src2, nbits);
}
-static inline int bitmap_empty(const unsigned long *src, int nbits)
+static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
{
if (small_const_nbits(nbits))
return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -265,7 +265,7 @@ static inline int bitmap_empty(const unsigned long *src, int nbits)
return __bitmap_empty(src, nbits);
}
-static inline int bitmap_full(const unsigned long *src, int nbits)
+static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -273,7 +273,7 @@ static inline int bitmap_full(const unsigned long *src, int nbits)
return __bitmap_full(src, nbits);
}
-static inline int bitmap_weight(const unsigned long *src, int nbits)
+static inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
{
if (small_const_nbits(nbits))
return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -284,7 +284,7 @@ static inline void bitmap_shift_right(unsigned long *dst,
const unsigned long *src, int n, int nbits)
{
if (small_const_nbits(nbits))
- *dst = *src >> n;
+ *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n;
else
__bitmap_shift_right(dst, src, n, nbits);
}
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 0846e6b931ce..89f67c1c3160 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -2,7 +2,7 @@
#define _LINUX_BYTEORDER_GENERIC_H
/*
- * linux/byteorder_generic.h
+ * linux/byteorder/generic.h
* Generic Byte-reordering support
*
* The "... p" macros, like le64_to_cpup, can be used with pointers
diff --git a/include/linux/cma.h b/include/linux/cma.h
new file mode 100644
index 000000000000..371b93042520
--- /dev/null
+++ b/include/linux/cma.h
@@ -0,0 +1,27 @@
+#ifndef __CMA_H__
+#define __CMA_H__
+
+/*
+ * There is always at least global CMA area and a few optional
+ * areas configured in kernel .config.
+ */
+#ifdef CONFIG_CMA_AREAS
+#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS)
+
+#else
+#define MAX_CMA_AREAS (0)
+
+#endif
+
+struct cma;
+
+extern phys_addr_t cma_get_base(struct cma *cma);
+extern unsigned long cma_get_size(struct cma *cma);
+
+extern int __init cma_declare_contiguous(phys_addr_t size,
+ phys_addr_t base, phys_addr_t limit,
+ phys_addr_t alignment, unsigned int order_per_bit,
+ bool fixed, struct cma **res_cma);
+extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
+extern bool cma_release(struct cma *cma, struct page *pages, int count);
+#endif
diff --git a/include/linux/crc64_ecma.h b/include/linux/crc64_ecma.h
new file mode 100644
index 000000000000..bba7a4d692b3
--- /dev/null
+++ b/include/linux/crc64_ecma.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CRC64_ECMA_H_
+#define __CRC64_ECMA_H_
+
+#include <linux/types.h>
+
+
+#define CRC64_DEFAULT_INITVAL 0xFFFFFFFFFFFFFFFFULL
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * @pdata: pointer to the data to compute checksum for.
+ * @nbytes: number of bytes in data buffer.
+ * @seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed);
+
+#endif /* __CRC64_ECMA_H_ */
diff --git a/include/linux/decompress/bunzip2.h b/include/linux/decompress/bunzip2.h
index 115272137a9c..4d683df898e6 100644
--- a/include/linux/decompress/bunzip2.h
+++ b/include/linux/decompress/bunzip2.h
@@ -1,10 +1,10 @@
#ifndef DECOMPRESS_BUNZIP2_H
#define DECOMPRESS_BUNZIP2_H
-int bunzip2(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int bunzip2(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error)(char *x));
#endif
diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h
index 0c7111a55a1a..1fcfd64b5076 100644
--- a/include/linux/decompress/generic.h
+++ b/include/linux/decompress/generic.h
@@ -1,11 +1,11 @@
#ifndef DECOMPRESS_GENERIC_H
#define DECOMPRESS_GENERIC_H
-typedef int (*decompress_fn) (unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+typedef int (*decompress_fn) (unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *outbuf,
- int *posp,
+ long *posp,
void(*error)(char *x));
/* inbuf - input buffer
@@ -33,7 +33,7 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len,
/* Utility routine to detect the decompression method */
-decompress_fn decompress_method(const unsigned char *inbuf, int len,
+decompress_fn decompress_method(const unsigned char *inbuf, long len,
const char **name);
#endif
diff --git a/include/linux/decompress/inflate.h b/include/linux/decompress/inflate.h
index 1d0aedef9822..e4f411fdbd24 100644
--- a/include/linux/decompress/inflate.h
+++ b/include/linux/decompress/inflate.h
@@ -1,10 +1,10 @@
#ifndef LINUX_DECOMPRESS_INFLATE_H
#define LINUX_DECOMPRESS_INFLATE_H
-int gunzip(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int gunzip(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error_fn)(char *x));
#endif
diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h
index d5b68bf3ec92..3273c2f36496 100644
--- a/include/linux/decompress/unlz4.h
+++ b/include/linux/decompress/unlz4.h
@@ -1,10 +1,10 @@
#ifndef DECOMPRESS_UNLZ4_H
#define DECOMPRESS_UNLZ4_H
-int unlz4(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int unlz4(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error)(char *x));
#endif
diff --git a/include/linux/decompress/unlzma.h b/include/linux/decompress/unlzma.h
index 7796538f1bf4..8a891a193840 100644
--- a/include/linux/decompress/unlzma.h
+++ b/include/linux/decompress/unlzma.h
@@ -1,11 +1,11 @@
#ifndef DECOMPRESS_UNLZMA_H
#define DECOMPRESS_UNLZMA_H
-int unlzma(unsigned char *, int,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int unlzma(unsigned char *, long,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
);
diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h
index 987229752519..af18f95d6570 100644
--- a/include/linux/decompress/unlzo.h
+++ b/include/linux/decompress/unlzo.h
@@ -1,10 +1,10 @@
#ifndef DECOMPRESS_UNLZO_H
#define DECOMPRESS_UNLZO_H
-int unlzo(unsigned char *inbuf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+int unlzo(unsigned char *inbuf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *pos,
+ long *pos,
void(*error)(char *x));
#endif
diff --git a/include/linux/decompress/unxz.h b/include/linux/decompress/unxz.h
index 41728fc6c8a1..f764e2a7201e 100644
--- a/include/linux/decompress/unxz.h
+++ b/include/linux/decompress/unxz.h
@@ -10,10 +10,10 @@
#ifndef DECOMPRESS_UNXZ_H
#define DECOMPRESS_UNXZ_H
-int unxz(unsigned char *in, int in_size,
- int (*fill)(void *dest, unsigned int size),
- int (*flush)(void *src, unsigned int size),
- unsigned char *out, int *in_used,
+int unxz(unsigned char *in, long in_size,
+ long (*fill)(void *dest, unsigned long size),
+ long (*flush)(void *src, unsigned long size),
+ unsigned char *out, long *in_used,
void (*error)(char *x));
#endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 772eab5d524a..569bbd039896 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -53,18 +53,13 @@
#ifdef __KERNEL__
+#include <linux/device.h>
+
struct cma;
struct page;
-struct device;
#ifdef CONFIG_DMA_CMA
-/*
- * There is always at least global CMA area and a few optional device
- * private areas configured in kernel .config.
- */
-#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS)
-
extern struct cma *dma_contiguous_default_area;
static inline struct cma *dev_get_cma_area(struct device *dev)
@@ -123,8 +118,6 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
#else
-#define MAX_CMA_AREAS (0)
-
static inline struct cma *dev_get_cma_area(struct device *dev)
{
return NULL;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 41bbf8ba4ba8..1e191f8e5244 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1151,6 +1151,9 @@ int efivars_sysfs_init(void);
#ifdef CONFIG_EFI_RUNTIME_MAP
int efi_runtime_map_init(struct kobject *);
void efi_runtime_map_setup(void *, int, u32);
+int efi_get_runtime_map_size(void);
+int efi_get_runtime_map_desc_size(void);
+int efi_runtime_map_copy(void *buf, size_t bufsz);
#else
static inline int efi_runtime_map_init(struct kobject *kobj)
{
@@ -1159,6 +1162,22 @@ static inline int efi_runtime_map_init(struct kobject *kobj)
static inline void
efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {}
+
+static inline int efi_get_runtime_map_size(void)
+{
+ return 0;
+}
+
+static inline int efi_get_runtime_map_desc_size(void)
+{
+ return 0;
+}
+
+static inline int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+ return 0;
+}
+
#endif
#endif /* _LINUX_EFI_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2daccaf4b547..1ab6c6913040 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2688,7 +2688,7 @@ static const struct file_operations __fops = { \
.read = simple_attr_read, \
.write = simple_attr_write, \
.llseek = generic_file_llseek, \
-};
+}
static inline __printf(1, 2)
void __simple_attr_check_format(const char *fmt, ...)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6eb1fb37de9a..5e7219dc0fae 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -360,7 +360,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask);
void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
void free_pages_exact(void *virt, size_t size);
/* This is different from alloc_pages_exact_node !!! */
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
#define __get_free_page(gfp_mask) \
__get_free_pages((gfp_mask), 0)
diff --git a/include/linux/glob.h b/include/linux/glob.h
new file mode 100644
index 000000000000..c990b7fbf0a8
--- /dev/null
+++ b/include/linux/glob.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_GLOB_H
+#define _LINUX_GLOB_H
+
+#include <linux/types.h> /* For bool */
+#include <linux/compiler.h> /* For __pure */
+
+bool __pure glob_match(char const *pat, char const *str);
+
+#endif /* _LINUX_GLOB_H */
+
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b826239bdce0..63579cb8d3dc 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
#endif /* CONFIG_DEBUG_VM */
extern unsigned long transparent_hugepage_flags;
-extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pmd_t *dst_pmd, pmd_t *src_pmd,
- struct vm_area_struct *vma,
- unsigned long addr, unsigned long end);
extern int split_huge_page_to_list(struct page *page, struct list_head *list);
static inline int split_huge_page(struct page *page)
{
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 255cd5cc0754..41272bcf73f8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -86,7 +86,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
#endif
extern unsigned long hugepages_treat_as_movable;
-extern const unsigned long hugetlb_zero, hugetlb_infinity;
extern int sysctl_hugetlb_shm_group;
extern struct list_head huge_boot_pages;
diff --git a/include/linux/input.h b/include/linux/input.h
index 82ce323b9986..6453b22372ac 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -79,6 +79,7 @@ struct input_value {
* @led: reflects current state of device's LEDs
* @snd: reflects current state of sound effects
* @sw: reflects current state of device's switches
+ * @leds: leds objects for the device's LEDs
* @open: this method is called when the very first user calls
* input_open_device(). The driver must prepare the device
* to start generating events (start polling thread,
@@ -164,6 +165,8 @@ struct input_dev {
unsigned long snd[BITS_TO_LONGS(SND_CNT)];
unsigned long sw[BITS_TO_LONGS(SW_CNT)];
+ struct led_classdev *leds;
+
int (*open)(struct input_dev *dev);
void (*close)(struct input_dev *dev);
int (*flush)(struct input_dev *dev, struct file *file);
@@ -531,4 +534,22 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
int input_ff_create_memless(struct input_dev *dev, void *data,
int (*play_effect)(struct input_dev *, void *, struct ff_effect *));
+#ifdef CONFIG_INPUT_LEDS
+
+int input_led_connect(struct input_dev *dev);
+void input_led_disconnect(struct input_dev *dev);
+
+#else
+
+static inline int input_led_connect(struct input_dev *dev)
+{
+ return 0;
+}
+
+static inline void input_led_disconnect(struct input_dev *dev)
+{
+}
+
+#endif
+
#endif
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5e3a906cc089..142ec544167c 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -237,6 +237,12 @@ extern int iomem_is_exclusive(u64 addr);
extern int
walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg, int (*func)(unsigned long, unsigned long, void *));
+extern int
+walk_system_ram_res(u64 start, u64 end, void *arg,
+ int (*func)(u64, u64, void *));
+extern int
+walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg,
+ int (*func)(u64, u64, void *));
/* True if any part of r1 overlaps r2 */
static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a9e2268ecccb..e9892041cb41 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -470,6 +470,7 @@ extern enum system_states {
#define TAINT_FIRMWARE_WORKAROUND 11
#define TAINT_OOT_MODULE 12
#define TAINT_UNSIGNED_MODULE 13
+#define TAINT_SOFTLOCKUP 14
extern const char hex_asc[];
#define hex_asc_lo(x) hex_asc[((x) & 0x0f)]
@@ -493,11 +494,6 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte)
return buf;
}
-static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
-{
- return hex_byte_pack(buf, byte);
-}
-
extern int hex_to_bin(char ch);
extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
@@ -719,23 +715,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
(void) (&_max1 == &_max2); \
_max1 > _max2 ? _max1 : _max2; })
-#define min3(x, y, z) ({ \
- typeof(x) _min1 = (x); \
- typeof(y) _min2 = (y); \
- typeof(z) _min3 = (z); \
- (void) (&_min1 == &_min2); \
- (void) (&_min1 == &_min3); \
- _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
- (_min2 < _min3 ? _min2 : _min3); })
-
-#define max3(x, y, z) ({ \
- typeof(x) _max1 = (x); \
- typeof(y) _max2 = (y); \
- typeof(z) _max3 = (z); \
- (void) (&_max1 == &_max2); \
- (void) (&_max1 == &_max3); \
- _max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
- (_max2 > _max3 ? _max2 : _max3); })
+#define min3(x, y, z) min((typeof(x))min(x, y), z)
+#define max3(x, y, z) max((typeof(x))max(x, y), z)
/**
* min_not_zero - return the minimum that is _not_ zero, unless both are zero
@@ -750,20 +731,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
/**
* clamp - return a value clamped to a given range with strict typechecking
* @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
*
* This macro does strict typechecking of min/max to make sure they are of the
* same type as val. See the unnecessary pointer comparisons.
*/
-#define clamp(val, min, max) ({ \
- typeof(val) __val = (val); \
- typeof(min) __min = (min); \
- typeof(max) __max = (max); \
- (void) (&__val == &__min); \
- (void) (&__val == &__max); \
- __val = __val < __min ? __min: __val; \
- __val > __max ? __max: __val; })
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
/*
* ..and if you can't take the strict
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index a75641930049..9481703b0e7a 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -10,6 +10,7 @@
#include <linux/ioport.h>
#include <linux/elfcore.h>
#include <linux/elf.h>
+#include <linux/module.h>
#include <asm/kexec.h>
/* Verify architecture specific macros are defined */
@@ -69,7 +70,18 @@ typedef unsigned long kimage_entry_t;
#define IND_SOURCE 0x8
struct kexec_segment {
- void __user *buf;
+ /*
+ * This pointer can point to user memory if kexec_load() system
+ * call is used or will point to kernel memory if
+ * kexec_file_load() system call is used.
+ *
+ * Use ->buf when expecting to deal with user memory and use ->kbuf
+ * when expecting to deal with kernel memory.
+ */
+ union {
+ void __user *buf;
+ void *kbuf;
+ };
size_t bufsz;
unsigned long mem;
size_t memsz;
@@ -84,6 +96,27 @@ struct compat_kexec_segment {
};
#endif
+struct kexec_sha_region {
+ unsigned long start;
+ unsigned long len;
+};
+
+struct purgatory_info {
+ /* Pointer to elf header of read only purgatory */
+ Elf_Ehdr *ehdr;
+
+ /* Pointer to purgatory sechdrs which are modifiable */
+ Elf_Shdr *sechdrs;
+ /*
+ * Temporary buffer location where purgatory is loaded and relocated
+ * This memory can be freed post image load
+ */
+ void *purgatory_buf;
+
+ /* Address where purgatory is finally loaded and is executed from */
+ unsigned long purgatory_load_addr;
+};
+
struct kimage {
kimage_entry_t head;
kimage_entry_t *entry;
@@ -100,7 +133,7 @@ struct kimage {
struct list_head control_pages;
struct list_head dest_pages;
- struct list_head unuseable_pages;
+ struct list_head unusable_pages;
/* Address of next control page to allocate for crash kernels. */
unsigned long control_page;
@@ -110,13 +143,60 @@ struct kimage {
#define KEXEC_TYPE_DEFAULT 0
#define KEXEC_TYPE_CRASH 1
unsigned int preserve_context : 1;
+ /* If set, we are using file mode kexec syscall */
+ unsigned int file_mode:1;
#ifdef ARCH_HAS_KIMAGE_ARCH
struct kimage_arch arch;
#endif
+
+ /* Additional fields for file based kexec syscall */
+ void *kernel_buf;
+ unsigned long kernel_buf_len;
+
+ void *initrd_buf;
+ unsigned long initrd_buf_len;
+
+ char *cmdline_buf;
+ unsigned long cmdline_buf_len;
+
+ /* File operations provided by image loader */
+ struct kexec_file_ops *fops;
+
+ /* Image loader handling the kernel can store a pointer here */
+ void *image_loader_data;
+
+ /* Information for loading purgatory */
+ struct purgatory_info purgatory_info;
};
+/*
+ * Keeps track of buffer parameters as provided by caller for requesting
+ * memory placement of buffer.
+ */
+struct kexec_buf {
+ struct kimage *image;
+ char *buffer;
+ unsigned long bufsz;
+ unsigned long memsz;
+ unsigned long buf_align;
+ unsigned long buf_min;
+ unsigned long buf_max;
+ bool top_down; /* allocate from top of memory hole */
+};
+typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size);
+typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len);
+typedef int (kexec_cleanup_t)(void *loader_data);
+
+struct kexec_file_ops {
+ kexec_probe_t *probe;
+ kexec_load_t *load;
+ kexec_cleanup_t *cleanup;
+};
/* kexec interface functions */
extern void machine_kexec(struct kimage *image);
@@ -127,8 +207,21 @@ extern asmlinkage long sys_kexec_load(unsigned long entry,
struct kexec_segment __user *segments,
unsigned long flags);
extern int kernel_kexec(void);
+extern int kexec_add_buffer(struct kimage *image, char *buffer,
+ unsigned long bufsz, unsigned long memsz,
+ unsigned long buf_align, unsigned long buf_min,
+ unsigned long buf_max, bool top_down,
+ unsigned long *load_addr);
extern struct page *kimage_alloc_control_pages(struct kimage *image,
unsigned int order);
+extern int kexec_load_purgatory(struct kimage *image, unsigned long min,
+ unsigned long max, int top_down,
+ unsigned long *load_addr);
+extern int kexec_purgatory_get_set_symbol(struct kimage *image,
+ const char *name, void *buf,
+ unsigned int size, bool get_value);
+extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
+ const char *name);
extern void crash_kexec(struct pt_regs *);
int kexec_should_crash(struct task_struct *);
void crash_save_cpu(struct pt_regs *regs, int cpu);
@@ -177,6 +270,10 @@ extern int kexec_load_disabled;
#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
#endif
+/* List of defined/legal kexec file flags */
+#define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
+ KEXEC_FILE_NO_INITRAMFS)
+
#define VMCOREINFO_BYTES (4096)
#define VMCOREINFO_NOTE_NAME "VMCOREINFO"
#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
diff --git a/include/linux/klist.h b/include/linux/klist.h
index a370ce57cf1d..61e5b723ae73 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -44,7 +44,7 @@ struct klist_node {
extern void klist_add_tail(struct klist_node *n, struct klist *k);
extern void klist_add_head(struct klist_node *n, struct klist *k);
-extern void klist_add_after(struct klist_node *n, struct klist_node *pos);
+extern void klist_add_behind(struct klist_node *n, struct klist_node *pos);
extern void klist_add_before(struct klist_node *n, struct klist_node *pos);
extern void klist_del(struct klist_node *n);
diff --git a/include/linux/list.h b/include/linux/list.h
index ef9594171062..cbbb96fcead9 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -654,15 +654,15 @@ static inline void hlist_add_before(struct hlist_node *n,
*(n->pprev) = n;
}
-static inline void hlist_add_after(struct hlist_node *n,
- struct hlist_node *next)
+static inline void hlist_add_behind(struct hlist_node *n,
+ struct hlist_node *prev)
{
- next->next = n->next;
- n->next = next;
- next->pprev = &n->next;
+ n->next = prev->next;
+ prev->next = n;
+ n->pprev = &prev->next;
- if(next->next)
- next->next->pprev = &next->next;
+ if (n->next)
+ n->next->pprev = &n->next;
}
/* after that we'll appear to be on some hlist and hlist_del will work */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b660e05b63d4..e8cc45307f8f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -249,7 +249,7 @@ phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
/*
* Set the allocation direction to bottom-up or top-down.
*/
-static inline void memblock_set_bottom_up(bool enable)
+static inline void __init memblock_set_bottom_up(bool enable)
{
memblock.bottom_up = enable;
}
@@ -264,7 +264,7 @@ static inline bool memblock_bottom_up(void)
return memblock.bottom_up;
}
#else
-static inline void memblock_set_bottom_up(bool enable) {}
+static inline void __init memblock_set_bottom_up(bool enable) {}
static inline bool memblock_bottom_up(void) { return false; }
#endif
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index eb65d29516ca..e0752d204d9e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,39 +54,20 @@ struct mem_cgroup_reclaim_cookie {
};
#ifdef CONFIG_MEMCG
-/*
- * All "charge" functions with gfp_mask should use GFP_KERNEL or
- * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
- * alloc memory but reclaims memory from all available zones. So, "where I want
- * memory from" bits of gfp_mask has no meaning. So any bits of that field is
- * available but adding a rule is better. charge functions' gfp_mask should
- * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
- * codes.
- * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
- */
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask, struct mem_cgroup **memcgp);
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+ bool lrucare);
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
+void mem_cgroup_uncharge(struct page *page);
+void mem_cgroup_uncharge_list(struct list_head *page_list);
-extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask);
-/* for swap handling */
-extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
- struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
-extern void mem_cgroup_commit_charge_swapin(struct page *page,
- struct mem_cgroup *memcg);
-extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
-
-extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask);
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+ bool lrucare);
struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
-/* For coalescing uncharge for reducing memcg' overhead*/
-extern void mem_cgroup_uncharge_start(void);
-extern void mem_cgroup_uncharge_end(void);
-
-extern void mem_cgroup_uncharge_page(struct page *page);
-extern void mem_cgroup_uncharge_cache_page(struct page *page);
-
bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
struct mem_cgroup *memcg);
bool task_in_mem_cgroup(struct task_struct *task,
@@ -113,12 +94,6 @@ bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
-extern void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
- struct mem_cgroup **memcgp);
-extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
- struct page *oldpage, struct page *newpage, bool migration_ok);
-
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
struct mem_cgroup *,
struct mem_cgroup_reclaim_cookie *);
@@ -133,8 +108,6 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
struct task_struct *p);
-extern void mem_cgroup_replace_page_cache(struct page *oldpage,
- struct page *newpage);
static inline void mem_cgroup_oom_enable(void)
{
@@ -233,46 +206,36 @@ void mem_cgroup_print_bad_page(struct page *page);
#else /* CONFIG_MEMCG */
struct mem_cgroup;
-static inline int mem_cgroup_charge_anon(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask)
-{
- return 0;
-}
-
-static inline int mem_cgroup_charge_file(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask)
-{
- return 0;
-}
-
-static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
- struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
+static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask,
+ struct mem_cgroup **memcgp)
{
+ *memcgp = NULL;
return 0;
}
-static inline void mem_cgroup_commit_charge_swapin(struct page *page,
- struct mem_cgroup *memcg)
-{
-}
-
-static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
+static inline void mem_cgroup_commit_charge(struct page *page,
+ struct mem_cgroup *memcg,
+ bool lrucare)
{
}
-static inline void mem_cgroup_uncharge_start(void)
+static inline void mem_cgroup_cancel_charge(struct page *page,
+ struct mem_cgroup *memcg)
{
}
-static inline void mem_cgroup_uncharge_end(void)
+static inline void mem_cgroup_uncharge(struct page *page)
{
}
-static inline void mem_cgroup_uncharge_page(struct page *page)
+static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
{
}
-static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+static inline void mem_cgroup_migrate(struct page *oldpage,
+ struct page *newpage,
+ bool lrucare)
{
}
@@ -311,17 +274,6 @@ static inline struct cgroup_subsys_state
return NULL;
}
-static inline void
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
- struct mem_cgroup **memcgp)
-{
-}
-
-static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
- struct page *oldpage, struct page *newpage, bool migration_ok)
-{
-}
-
static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
@@ -417,10 +369,6 @@ static inline
void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
{
}
-static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
- struct page *newpage)
-{
-}
#endif /* CONFIG_MEMCG */
#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 010d125bffbf..79dd9eca054f 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -26,11 +26,12 @@ enum {
MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
};
-/* Types for control the zone type of onlined memory */
+/* Types for control the zone type of onlined and offlined memory */
enum {
- ONLINE_KEEP,
- ONLINE_KERNEL,
- ONLINE_MOVABLE,
+ MMOP_OFFLINE = -1,
+ MMOP_ONLINE_KEEP,
+ MMOP_ONLINE_KERNEL,
+ MMOP_ONLINE_MOVABLE,
};
/*
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 96c5750e3110..21bff4be4379 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -461,6 +461,7 @@ static inline void mm_init_cpumask(struct mm_struct *mm)
#ifdef CONFIG_CPUMASK_OFFSTACK
mm->cpu_vm_mask_var = &mm->cpumask_allocation;
#endif
+ cpumask_clear(mm->cpu_vm_mask_var);
}
/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index edd82a105220..2f348d02f640 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -20,11 +20,13 @@ extern void dump_page_badflags(struct page *page, const char *reason,
} while (0)
#define VM_WARN_ON(cond) WARN_ON(cond)
#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
+#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
#else
#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
#ifdef CONFIG_DEBUG_VIRTUAL
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6cbd1b6c3d20..318df7051850 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -143,6 +143,7 @@ enum zone_stat_item {
NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
NR_DIRTIED, /* page dirtyings since bootup */
NR_WRITTEN, /* page writings since bootup */
+ NR_PAGES_SCANNED, /* pages scanned since last reclaim */
#ifdef CONFIG_NUMA
NUMA_HIT, /* allocated in intended node */
NUMA_MISS, /* allocated in non intended node */
@@ -324,19 +325,12 @@ enum zone_type {
#ifndef __GENERATING_BOUNDS_H
struct zone {
- /* Fields commonly accessed by the page allocator */
+ /* Read-mostly fields */
/* zone watermarks, access with *_wmark_pages(zone) macros */
unsigned long watermark[NR_WMARK];
/*
- * When free pages are below this point, additional steps are taken
- * when reading the number of free pages to avoid per-cpu counter
- * drift allowing watermarks to be breached
- */
- unsigned long percpu_drift_mark;
-
- /*
* We don't know if the memory that we're going to allocate will be freeable
* or/and it will be released eventually, so to avoid totally wasting several
* GB of ram we must reserve some of the lower zone memory (otherwise we risk
@@ -344,41 +338,26 @@ struct zone {
* on the higher zones). This array is recalculated at runtime if the
* sysctl_lowmem_reserve_ratio sysctl changes.
*/
- unsigned long lowmem_reserve[MAX_NR_ZONES];
-
- /*
- * This is a per-zone reserve of pages that should not be
- * considered dirtyable memory.
- */
- unsigned long dirty_balance_reserve;
+ long lowmem_reserve[MAX_NR_ZONES];
#ifdef CONFIG_NUMA
int node;
+#endif
+
/*
- * zone reclaim becomes active if more unmapped pages exist.
+ * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+ * this zone's LRU. Maintained by the pageout code.
*/
- unsigned long min_unmapped_pages;
- unsigned long min_slab_pages;
-#endif
+ unsigned int inactive_ratio;
+
+ struct pglist_data *zone_pgdat;
struct per_cpu_pageset __percpu *pageset;
+
/*
- * free areas of different sizes
+ * This is a per-zone reserve of pages that should not be
+ * considered dirtyable memory.
*/
- spinlock_t lock;
-#if defined CONFIG_COMPACTION || defined CONFIG_CMA
- /* Set to true when the PG_migrate_skip bits should be cleared */
- bool compact_blockskip_flush;
-
- /* pfn where compaction free scanner should start */
- unsigned long compact_cached_free_pfn;
- /* pfn where async and sync compaction migration scanner should start */
- unsigned long compact_cached_migrate_pfn[2];
-#endif
-#ifdef CONFIG_MEMORY_HOTPLUG
- /* see spanned/present_pages for more description */
- seqlock_t span_seqlock;
-#endif
- struct free_area free_area[MAX_ORDER];
+ unsigned long dirty_balance_reserve;
#ifndef CONFIG_SPARSEMEM
/*
@@ -388,74 +367,14 @@ struct zone {
unsigned long *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
-#ifdef CONFIG_COMPACTION
- /*
- * On compaction failure, 1<<compact_defer_shift compactions
- * are skipped before trying again. The number attempted since
- * last failure is tracked with compact_considered.
- */
- unsigned int compact_considered;
- unsigned int compact_defer_shift;
- int compact_order_failed;
-#endif
-
- ZONE_PADDING(_pad1_)
-
- /* Fields commonly accessed by the page reclaim scanner */
- spinlock_t lru_lock;
- struct lruvec lruvec;
-
- /* Evictions & activations on the inactive file list */
- atomic_long_t inactive_age;
-
- unsigned long pages_scanned; /* since last reclaim */
- unsigned long flags; /* zone flags, see below */
-
- /* Zone statistics */
- atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
-
- /*
- * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
- * this zone's LRU. Maintained by the pageout code.
- */
- unsigned int inactive_ratio;
-
-
- ZONE_PADDING(_pad2_)
- /* Rarely used or read-mostly fields */
-
+#ifdef CONFIG_NUMA
/*
- * wait_table -- the array holding the hash table
- * wait_table_hash_nr_entries -- the size of the hash table array
- * wait_table_bits -- wait_table_size == (1 << wait_table_bits)
- *
- * The purpose of all these is to keep track of the people
- * waiting for a page to become available and make them
- * runnable again when possible. The trouble is that this
- * consumes a lot of space, especially when so few things
- * wait on pages at a given time. So instead of using
- * per-page waitqueues, we use a waitqueue hash table.
- *
- * The bucket discipline is to sleep on the same queue when
- * colliding and wake all in that wait queue when removing.
- * When something wakes, it must check to be sure its page is
- * truly available, a la thundering herd. The cost of a
- * collision is great, but given the expected load of the
- * table, they should be so rare as to be outweighed by the
- * benefits from the saved space.
- *
- * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
- * primary users of these fields, and in mm/page_alloc.c
- * free_area_init_core() performs the initialization of them.
+ * zone reclaim becomes active if more unmapped pages exist.
*/
- wait_queue_head_t * wait_table;
- unsigned long wait_table_hash_nr_entries;
- unsigned long wait_table_bits;
+ unsigned long min_unmapped_pages;
+ unsigned long min_slab_pages;
+#endif /* CONFIG_NUMA */
- /*
- * Discontig memory support fields.
- */
- struct pglist_data *zone_pgdat;
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;
@@ -500,9 +419,11 @@ struct zone {
* adjust_managed_page_count() should be used instead of directly
* touching zone->managed_pages and totalram_pages.
*/
+ unsigned long managed_pages;
unsigned long spanned_pages;
unsigned long present_pages;
- unsigned long managed_pages;
+
+ const char *name;
/*
* Number of MIGRATE_RESEVE page block. To maintain for just
@@ -510,10 +431,94 @@ struct zone {
*/
int nr_migrate_reserve_block;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /* see spanned/present_pages for more description */
+ seqlock_t span_seqlock;
+#endif
+
/*
- * rarely used fields:
+ * wait_table -- the array holding the hash table
+ * wait_table_hash_nr_entries -- the size of the hash table array
+ * wait_table_bits -- wait_table_size == (1 << wait_table_bits)
+ *
+ * The purpose of all these is to keep track of the people
+ * waiting for a page to become available and make them
+ * runnable again when possible. The trouble is that this
+ * consumes a lot of space, especially when so few things
+ * wait on pages at a given time. So instead of using
+ * per-page waitqueues, we use a waitqueue hash table.
+ *
+ * The bucket discipline is to sleep on the same queue when
+ * colliding and wake all in that wait queue when removing.
+ * When something wakes, it must check to be sure its page is
+ * truly available, a la thundering herd. The cost of a
+ * collision is great, but given the expected load of the
+ * table, they should be so rare as to be outweighed by the
+ * benefits from the saved space.
+ *
+ * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
+ * primary users of these fields, and in mm/page_alloc.c
+ * free_area_init_core() performs the initialization of them.
*/
- const char *name;
+ wait_queue_head_t *wait_table;
+ unsigned long wait_table_hash_nr_entries;
+ unsigned long wait_table_bits;
+
+ ZONE_PADDING(_pad1_)
+
+ /* Write-intensive fields used from the page allocator */
+ spinlock_t lock;
+
+ /* free areas of different sizes */
+ struct free_area free_area[MAX_ORDER];
+
+ /* zone flags, see below */
+ unsigned long flags;
+
+ ZONE_PADDING(_pad2_)
+
+ /* Write-intensive fields used by page reclaim */
+
+ /* Fields commonly accessed by the page reclaim scanner */
+ spinlock_t lru_lock;
+ struct lruvec lruvec;
+
+ /* Evictions & activations on the inactive file list */
+ atomic_long_t inactive_age;
+
+ /*
+ * When free pages are below this point, additional steps are taken
+ * when reading the number of free pages to avoid per-cpu counter
+ * drift allowing watermarks to be breached
+ */
+ unsigned long percpu_drift_mark;
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+ /* pfn where compaction free scanner should start */
+ unsigned long compact_cached_free_pfn;
+ /* pfn where async and sync compaction migration scanner should start */
+ unsigned long compact_cached_migrate_pfn[2];
+#endif
+
+#ifdef CONFIG_COMPACTION
+ /*
+ * On compaction failure, 1<<compact_defer_shift compactions
+ * are skipped before trying again. The number attempted since
+ * last failure is tracked with compact_considered.
+ */
+ unsigned int compact_considered;
+ unsigned int compact_defer_shift;
+ int compact_order_failed;
+#endif
+
+#if defined CONFIG_COMPACTION || defined CONFIG_CMA
+ /* Set to true when the PG_migrate_skip bits should be cleared */
+ bool compact_blockskip_flush;
+#endif
+
+ ZONE_PADDING(_pad3_)
+ /* Zone statistics */
+ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
} ____cacheline_internodealigned_in_smp;
typedef enum {
@@ -529,6 +534,7 @@ typedef enum {
ZONE_WRITEBACK, /* reclaim scanning has recently found
* many pages under writeback
*/
+ ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
} zone_flags_t;
static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -566,6 +572,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone)
return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
}
+static inline int zone_is_fair_depleted(const struct zone *zone)
+{
+ return test_bit(ZONE_FAIR_DEPLETED, &zone->flags);
+}
+
static inline int zone_is_oom_locked(const struct zone *zone)
{
return test_bit(ZONE_OOM_LOCKED, &zone->flags);
@@ -872,6 +883,8 @@ static inline int zone_movable_is_highmem(void)
{
#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
return movable_zone == ZONE_HIGHMEM;
+#elif defined(CONFIG_HIGHMEM)
+ return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
#else
return 0;
#endif
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8304959ad336..e1f5fcd79792 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -171,13 +171,12 @@ static inline int __TestClearPage##uname(struct page *page) \
#define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \
__SETPAGEFLAG(uname, lname) __CLEARPAGEFLAG(uname, lname)
-#define PAGEFLAG_FALSE(uname) \
-static inline int Page##uname(const struct page *page) \
- { return 0; }
-
#define TESTSCFLAG(uname, lname) \
TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
+#define TESTPAGEFLAG_FALSE(uname) \
+static inline int Page##uname(const struct page *page) { return 0; }
+
#define SETPAGEFLAG_NOOP(uname) \
static inline void SetPage##uname(struct page *page) { }
@@ -187,12 +186,21 @@ static inline void ClearPage##uname(struct page *page) { }
#define __CLEARPAGEFLAG_NOOP(uname) \
static inline void __ClearPage##uname(struct page *page) { }
+#define TESTSETFLAG_FALSE(uname) \
+static inline int TestSetPage##uname(struct page *page) { return 0; }
+
#define TESTCLEARFLAG_FALSE(uname) \
static inline int TestClearPage##uname(struct page *page) { return 0; }
#define __TESTCLEARFLAG_FALSE(uname) \
static inline int __TestClearPage##uname(struct page *page) { return 0; }
+#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \
+ SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+
+#define TESTSCFLAG_FALSE(uname) \
+ TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
+
struct page; /* forward declaration */
TESTPAGEFLAG(Locked, locked)
@@ -248,7 +256,6 @@ PAGEFLAG_FALSE(HighMem)
PAGEFLAG(SwapCache, swapcache)
#else
PAGEFLAG_FALSE(SwapCache)
- SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
#endif
PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
@@ -258,8 +265,8 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
#else
-PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
- TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
+ TESTSCFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 777a524716db..5c831f1eca79 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -3,17 +3,15 @@
enum {
/* flags for mem_cgroup */
- PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
- PCG_USED, /* this object is in use. */
- PCG_MIGRATION, /* under page migration */
- __NR_PCG_FLAGS,
+ PCG_USED = 0x01, /* This page is charged to a memcg */
+ PCG_MEM = 0x02, /* This page holds a memory charge */
+ PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
};
-#ifndef __GENERATING_BOUNDS_H
-#include <generated/bounds.h>
+struct pglist_data;
#ifdef CONFIG_MEMCG
-#include <linux/bit_spinlock.h>
+struct mem_cgroup;
/*
* Page Cgroup can be considered as an extended mem_map.
@@ -27,65 +25,30 @@ struct page_cgroup {
struct mem_cgroup *mem_cgroup;
};
-void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
+extern void pgdat_page_cgroup_init(struct pglist_data *pgdat);
#ifdef CONFIG_SPARSEMEM
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
{
}
-extern void __init page_cgroup_init(void);
+extern void page_cgroup_init(void);
#else
-void __init page_cgroup_init_flatmem(void);
-static inline void __init page_cgroup_init(void)
+extern void page_cgroup_init_flatmem(void);
+static inline void page_cgroup_init(void)
{
}
#endif
struct page_cgroup *lookup_page_cgroup(struct page *page);
-struct page *lookup_cgroup_page(struct page_cgroup *pc);
-
-#define TESTPCGFLAG(uname, lname) \
-static inline int PageCgroup##uname(struct page_cgroup *pc) \
- { return test_bit(PCG_##lname, &pc->flags); }
-
-#define SETPCGFLAG(uname, lname) \
-static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
- { set_bit(PCG_##lname, &pc->flags); }
-
-#define CLEARPCGFLAG(uname, lname) \
-static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
- { clear_bit(PCG_##lname, &pc->flags); }
-
-#define TESTCLEARPCGFLAG(uname, lname) \
-static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
- { return test_and_clear_bit(PCG_##lname, &pc->flags); }
-
-TESTPCGFLAG(Used, USED)
-CLEARPCGFLAG(Used, USED)
-SETPCGFLAG(Used, USED)
-
-SETPCGFLAG(Migration, MIGRATION)
-CLEARPCGFLAG(Migration, MIGRATION)
-TESTPCGFLAG(Migration, MIGRATION)
-static inline void lock_page_cgroup(struct page_cgroup *pc)
+static inline int PageCgroupUsed(struct page_cgroup *pc)
{
- /*
- * Don't take this lock in IRQ context.
- * This lock is for pc->mem_cgroup, USED, MIGRATION
- */
- bit_spin_lock(PCG_LOCK, &pc->flags);
+ return !!(pc->flags & PCG_USED);
}
-
-static inline void unlock_page_cgroup(struct page_cgroup *pc)
-{
- bit_spin_unlock(PCG_LOCK, &pc->flags);
-}
-
-#else /* CONFIG_MEMCG */
+#else /* !CONFIG_MEMCG */
struct page_cgroup;
-static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
+static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
}
@@ -98,10 +61,9 @@ static inline void page_cgroup_init(void)
{
}
-static inline void __init page_cgroup_init_flatmem(void)
+static inline void page_cgroup_init_flatmem(void)
{
}
-
#endif /* CONFIG_MEMCG */
#include <linux/swap.h>
@@ -140,6 +102,4 @@ static inline void swap_cgroup_swapoff(int type)
#endif /* CONFIG_MEMCG_SWAP */
-#endif /* !__GENERATING_BOUNDS_H */
-
#endif /* __LINUX_PAGE_CGROUP_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0a97b583ee8d..3df8c7db7a4e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -399,6 +399,18 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
}
/*
+ * Get the offset in PAGE_SIZE.
+ * (TODO: hugepage should have ->index in PAGE_SIZE)
+ */
+static inline pgoff_t page_to_pgoff(struct page *page)
+{
+ if (unlikely(PageHeadHuge(page)))
+ return page->index << compound_order(page);
+ else
+ return page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+}
+
+/*
* Return byte-offset into filesystem object for page.
*/
static inline loff_t page_offset(struct page *page)
@@ -472,6 +484,9 @@ static inline int lock_page_killable(struct page *page)
/*
* lock_page_or_retry - Lock the page, unless this would block and the
* caller indicated that it can handle a retry.
+ *
+ * Return value and mmap_sem implications depend on flags; see
+ * __lock_page_or_retry().
*/
static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 319ff7e53efb..0990997a5304 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -31,7 +31,7 @@ static inline const char *printk_skip_level(const char *buffer)
}
/* printk's without a loglevel use this.. */
-#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
+#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
/* We show everything that is MORE important than this.. */
#define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8183b46fbaa2..372ad5e0dcb8 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -432,9 +432,9 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
}
/**
- * hlist_add_after_rcu
- * @prev: the existing element to add the new element after.
+ * hlist_add_behind_rcu
* @n: the new element to add to the hash list.
+ * @prev: the existing element to add the new element after.
*
* Description:
* Adds the specified element to the specified hlist
@@ -449,8 +449,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
* hlist_for_each_entry_rcu(), used to prevent memory-consistency
* problems on Alpha CPUs.
*/
-static inline void hlist_add_after_rcu(struct hlist_node *prev,
- struct hlist_node *n)
+static inline void hlist_add_behind_rcu(struct hlist_node *n,
+ struct hlist_node *prev)
{
n->next = prev->next;
n->pprev = &prev->next;
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index a964f7285600..4b152c81c5fa 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -136,7 +136,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
struct scatterlist *sgl)
{
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
BUG();
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 42cac4dc2157..72d06cc99db0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -33,6 +33,7 @@ struct sched_param {
#include <linux/smp.h>
#include <linux/sem.h>
+#include <linux/shm.h>
#include <linux/signal.h>
#include <linux/compiler.h>
#include <linux/completion.h>
@@ -1386,6 +1387,7 @@ struct task_struct {
#ifdef CONFIG_SYSVIPC
/* ipc stuff */
struct sysv_sem sysvsem;
+ struct sysv_shm sysvshm;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */
@@ -1629,12 +1631,6 @@ struct task_struct {
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
- struct memcg_batch_info {
- int do_batch; /* incremented when batch uncharge started */
- struct mem_cgroup *memcg; /* target memcg of uncharge */
- unsigned long nr_pages; /* uncharged usage */
- unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
- } memcg_batch;
unsigned int memcg_kmem_skip_account;
struct memcg_oom_info {
struct mem_cgroup *memcg;
@@ -2955,15 +2951,10 @@ static inline void inc_syscw(struct task_struct *tsk)
#ifdef CONFIG_MEMCG
extern void mm_update_next_owner(struct mm_struct *mm);
-extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
#else
static inline void mm_update_next_owner(struct mm_struct *mm)
{
}
-
-static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
-}
#endif /* CONFIG_MEMCG */
static inline unsigned long task_rlimit(const struct task_struct *tsk,
diff --git a/include/linux/shm.h b/include/linux/shm.h
index 57d77709fbe2..6fb801686ad6 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -1,6 +1,7 @@
#ifndef _LINUX_SHM_H_
#define _LINUX_SHM_H_
+#include <linux/list.h>
#include <asm/page.h>
#include <uapi/linux/shm.h>
#include <asm/shmparam.h>
@@ -20,6 +21,7 @@ struct shmid_kernel /* private to the kernel */
/* The task created the shm object. NULL if the task is dead. */
struct task_struct *shm_creator;
+ struct list_head shm_clist; /* list by creator */
};
/* shm_mode upper byte flags */
@@ -44,11 +46,20 @@ struct shmid_kernel /* private to the kernel */
#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
#ifdef CONFIG_SYSVIPC
+struct sysv_shm {
+ struct list_head shm_clist;
+};
+
long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
unsigned long shmlba);
-extern int is_file_shm_hugepages(struct file *file);
-extern void exit_shm(struct task_struct *task);
+int is_file_shm_hugepages(struct file *file);
+void exit_shm(struct task_struct *task);
+#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
#else
+struct sysv_shm {
+ /* empty */
+};
+
static inline long do_shmat(int shmid, char __user *shmaddr,
int shmflg, unsigned long *addr,
unsigned long shmlba)
@@ -62,6 +73,9 @@ static inline int is_file_shm_hugepages(struct file *file)
static inline void exit_shm(struct task_struct *task)
{
}
+static inline void shm_init_task(struct task_struct *task)
+{
+}
#endif
#endif /* _LINUX_SHM_H_ */
diff --git a/include/linux/string.h b/include/linux/string.h
index d36977e029af..2663afcc0861 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -117,6 +117,7 @@ void *memchr_inv(const void *s, int c, size_t n);
extern char *kstrdup(const char *s, gfp_t gfp);
extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
+extern char *kstrimdup(const char *s, gfp_t gfp);
extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4bdbee80eede..1b72060f093a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -311,7 +311,6 @@ extern void lru_add_page_tail(struct page *page, struct page *page_tail,
struct lruvec *lruvec, struct list_head *head);
extern void activate_page(struct page *);
extern void mark_page_accessed(struct page *);
-extern void init_page_accessed(struct page *page);
extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_all(void);
@@ -321,6 +320,9 @@ extern void swap_setup(void);
extern void add_page_to_unevictable_list(struct page *page);
+extern void lru_cache_add_active_or_unevictable(struct page *page,
+ struct vm_area_struct *vma);
+
/* linux/mm/vmscan.c */
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
@@ -379,9 +381,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
}
#endif
#ifdef CONFIG_MEMCG_SWAP
-extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
+extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
+extern void mem_cgroup_uncharge_swap(swp_entry_t entry);
#else
-static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
+static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+}
+static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
{
}
#endif
@@ -441,7 +447,7 @@ extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern void swap_free(swp_entry_t);
-extern void swapcache_free(swp_entry_t, struct page *page);
+extern void swapcache_free(swp_entry_t);
extern int free_swap_and_cache(swp_entry_t);
extern int swap_type_of(dev_t, sector_t, struct block_device **);
extern unsigned int count_swap_pages(int, int);
@@ -505,7 +511,7 @@ static inline void swap_free(swp_entry_t swp)
{
}
-static inline void swapcache_free(swp_entry_t swp, struct page *page)
+static inline void swapcache_free(swp_entry_t swp)
{
}
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cd82f72fc908..f7e4b5241f82 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -317,6 +317,10 @@ asmlinkage long sys_restart_syscall(void);
asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
struct kexec_segment __user *segments,
unsigned long flags);
+asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd,
+ unsigned long cmdline_len,
+ const char __user *cmdline_ptr,
+ unsigned long flags);
asmlinkage long sys_exit(int error_code);
asmlinkage long sys_exit_group(int error_code);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 14a8ff2de11e..b7361f831226 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -34,8 +34,6 @@ struct ctl_table_root;
struct ctl_table_header;
struct ctl_dir;
-typedef struct ctl_table ctl_table;
-
typedef int proc_handler (struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4836ba3c1cd8..e95372654f09 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -57,9 +57,9 @@ static inline void put_user_ns(struct user_namespace *ns)
}
struct seq_operations;
-extern struct seq_operations proc_uid_seq_operations;
-extern struct seq_operations proc_gid_seq_operations;
-extern struct seq_operations proc_projid_seq_operations;
+extern const struct seq_operations proc_uid_seq_operations;
+extern const struct seq_operations proc_gid_seq_operations;
+extern const struct seq_operations proc_projid_seq_operations;
extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 4b8a89189a29..b87696fdf06a 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -113,7 +113,7 @@ extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
- struct page ***pages);
+ struct page **pages);
#ifdef CONFIG_MMU
extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
pgprot_t prot, struct page **pages);
diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 13af0d450bf6..f9d41a6e361f 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -11,7 +11,7 @@ struct zbud_ops {
struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
void zbud_destroy_pool(struct zbud_pool *pool);
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
unsigned long *handle);
void zbud_free(struct zbud_pool *pool, unsigned long handle);
int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
diff --git a/include/linux/zlib.h b/include/linux/zlib.h
index 9c5a6b4de0a3..92dbbd3f6c75 100644
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -83,11 +83,11 @@ struct internal_state;
typedef struct z_stream_s {
const Byte *next_in; /* next input byte */
- uInt avail_in; /* number of bytes available at next_in */
+ uLong avail_in; /* number of bytes available at next_in */
uLong total_in; /* total nb of input bytes read so far */
Byte *next_out; /* next output byte should be put there */
- uInt avail_out; /* remaining free space at next_out */
+ uLong avail_out; /* remaining free space at next_out */
uLong total_out; /* total nb of bytes output so far */
char *msg; /* last error message, NULL if no error */
@@ -493,64 +493,6 @@ extern int deflateInit2 (z_streamp strm,
method). msg is set to null if there is no error message. deflateInit2 does
not perform any compression: this will be done by deflate().
*/
-
-#if 0
-extern int zlib_deflateSetDictionary (z_streamp strm,
- const Byte *dictionary,
- uInt dictLength);
-#endif
-/*
- Initializes the compression dictionary from the given byte sequence
- without producing any compressed output. This function must be called
- immediately after deflateInit, deflateInit2 or deflateReset, before any
- call of deflate. The compressor and decompressor must use exactly the same
- dictionary (see inflateSetDictionary).
-
- The dictionary should consist of strings (byte sequences) that are likely
- to be encountered later in the data to be compressed, with the most commonly
- used strings preferably put towards the end of the dictionary. Using a
- dictionary is most useful when the data to be compressed is short and can be
- predicted with good accuracy; the data can then be compressed better than
- with the default empty dictionary.
-
- Depending on the size of the compression data structures selected by
- deflateInit or deflateInit2, a part of the dictionary may in effect be
- discarded, for example if the dictionary is larger than the window size in
- deflate or deflate2. Thus the strings most likely to be useful should be
- put at the end of the dictionary, not at the front.
-
- Upon return of this function, strm->adler is set to the Adler32 value
- of the dictionary; the decompressor may later use this value to determine
- which dictionary has been used by the compressor. (The Adler32 value
- applies to the whole dictionary even if only a subset of the dictionary is
- actually used by the compressor.)
-
- deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
- parameter is invalid (such as NULL dictionary) or the stream state is
- inconsistent (for example if deflate has already been called for this stream
- or if the compression method is bsort). deflateSetDictionary does not
- perform any compression: this will be done by deflate().
-*/
-
-#if 0
-extern int zlib_deflateCopy (z_streamp dest, z_streamp source);
-#endif
-
-/*
- Sets the destination stream as a complete copy of the source stream.
-
- This function can be useful when several compression strategies will be
- tried, for example when there are several ways of pre-processing the input
- data with a filter. The streams that will be discarded should then be freed
- by calling deflateEnd. Note that deflateCopy duplicates the internal
- compression state which can be quite large, so this strategy is slow and
- can consume lots of memory.
-
- deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
- enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
- (such as zalloc being NULL). msg is left unchanged in both source and
- destination.
-*/
extern int zlib_deflateReset (z_streamp strm);
/*
@@ -568,27 +510,6 @@ static inline unsigned long deflateBound(unsigned long s)
return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11;
}
-#if 0
-extern int zlib_deflateParams (z_streamp strm, int level, int strategy);
-#endif
-/*
- Dynamically update the compression level and compression strategy. The
- interpretation of level and strategy is as in deflateInit2. This can be
- used to switch between compression and straight copy of the input data, or
- to switch to a different kind of input data requiring a different
- strategy. If the compression level is changed, the input available so far
- is compressed with the old level (and may be flushed); the new level will
- take effect only at the next call of deflate().
-
- Before the call of deflateParams, the stream state must be set as for
- a call of deflate(), since the currently available input may have to
- be compressed and flushed. In particular, strm->avail_out must be non-zero.
-
- deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
- stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
- if strm->avail_out was zero.
-*/
-
/*
extern int inflateInit2 (z_streamp strm, int windowBits);
@@ -631,45 +552,6 @@ extern int inflateInit2 (z_streamp strm, int windowBits);
and avail_out are unchanged.)
*/
-extern int zlib_inflateSetDictionary (z_streamp strm,
- const Byte *dictionary,
- uInt dictLength);
-/*
- Initializes the decompression dictionary from the given uncompressed byte
- sequence. This function must be called immediately after a call of inflate,
- if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
- can be determined from the adler32 value returned by that call of inflate.
- The compressor and decompressor must use exactly the same dictionary (see
- deflateSetDictionary). For raw inflate, this function can be called
- immediately after inflateInit2() or inflateReset() and before any call of
- inflate() to set the dictionary. The application must insure that the
- dictionary that was used for compression is provided.
-
- inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
- parameter is invalid (such as NULL dictionary) or the stream state is
- inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
- expected one (incorrect adler32 value). inflateSetDictionary does not
- perform any decompression: this will be done by subsequent calls of
- inflate().
-*/
-
-#if 0
-extern int zlib_inflateSync (z_streamp strm);
-#endif
-/*
- Skips invalid compressed data until a full flush point (see above the
- description of deflate with Z_FULL_FLUSH) can be found, or until all
- available input is skipped. No output is provided.
-
- inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
- if no more input was provided, Z_DATA_ERROR if no flush point has been found,
- or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
- case, the application may save the current current value of total_in which
- indicates where valid compressed data was found. In the error case, the
- application may repeatedly call inflateSync, providing more input each time,
- until success or end of the input data.
-*/
-
extern int zlib_inflateReset (z_streamp strm);
/*
This function is equivalent to inflateEnd followed by inflateInit,
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
new file mode 100644
index 000000000000..f14bd75f08b3
--- /dev/null
+++ b/include/linux/zpool.h
@@ -0,0 +1,106 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for the zbud and zsmalloc memory
+ * storage pool implementations. Typically, this is used to
+ * store compressed memory.
+ */
+
+#ifndef _ZPOOL_H_
+#define _ZPOOL_H_
+
+struct zpool;
+
+struct zpool_ops {
+ int (*evict)(struct zpool *pool, unsigned long handle);
+};
+
+/*
+ * Control how a handle is mapped. It will be ignored if the
+ * implementation does not support it. Its use is optional.
+ * Note that this does not refer to memory protection, it
+ * refers to how the memory will be copied in/out if copying
+ * is necessary during mapping; read-write is the safest as
+ * it copies the existing memory in on map, and copies the
+ * changed memory back out on unmap. Write-only does not copy
+ * in the memory and should only be used for initialization.
+ * If in doubt, use ZPOOL_MM_DEFAULT which is read-write.
+ */
+enum zpool_mapmode {
+ ZPOOL_MM_RW, /* normal read-write mapping */
+ ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */
+ ZPOOL_MM_WO, /* write-only (no copy-in at map time) */
+
+ ZPOOL_MM_DEFAULT = ZPOOL_MM_RW
+};
+
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops);
+
+char *zpool_get_type(struct zpool *pool);
+
+void zpool_destroy_pool(struct zpool *pool);
+
+int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
+ unsigned long *handle);
+
+void zpool_free(struct zpool *pool, unsigned long handle);
+
+int zpool_shrink(struct zpool *pool, unsigned int pages,
+ unsigned int *reclaimed);
+
+void *zpool_map_handle(struct zpool *pool, unsigned long handle,
+ enum zpool_mapmode mm);
+
+void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
+
+u64 zpool_get_total_size(struct zpool *pool);
+
+
+/**
+ * struct zpool_driver - driver implementation for zpool
+ * @type: name of the driver.
+ * @list: entry in the list of zpool drivers.
+ * @create: create a new pool.
+ * @destroy: destroy a pool.
+ * @malloc: allocate mem from a pool.
+ * @free: free mem from a pool.
+ * @shrink: shrink the pool.
+ * @map: map a handle.
+ * @unmap: unmap a handle.
+ * @total_size: get total size of a pool.
+ *
+ * This is created by a zpool implementation and registered
+ * with zpool.
+ */
+struct zpool_driver {
+ char *type;
+ struct module *owner;
+ atomic_t refcount;
+ struct list_head list;
+
+ void *(*create)(gfp_t gfp, struct zpool_ops *ops);
+ void (*destroy)(void *pool);
+
+ int (*malloc)(void *pool, size_t size, gfp_t gfp,
+ unsigned long *handle);
+ void (*free)(void *pool, unsigned long handle);
+
+ int (*shrink)(void *pool, unsigned int pages,
+ unsigned int *reclaimed);
+
+ void *(*map)(void *pool, unsigned long handle,
+ enum zpool_mapmode mm);
+ void (*unmap)(void *pool, unsigned long handle);
+
+ u64 (*total_size)(void *pool);
+};
+
+void zpool_register_driver(struct zpool_driver *driver);
+
+int zpool_unregister_driver(struct zpool_driver *driver);
+
+int zpool_evict(void *pool, unsigned long handle);
+
+#endif
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 91e2e4215ba0..4b69139d9839 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -31,7 +31,7 @@ enum scsi_timeouts {
* Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
* is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
*/
-#ifdef ARCH_HAS_SG_CHAIN
+#ifdef CONFIG_ARCH_HAS_SG_CHAIN
#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048
#else
#define SCSI_MAX_SG_CHAIN_SEGMENTS SCSI_MAX_SG_SEGMENTS
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 4e4f2f8b1ac2..dd2b5467d905 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -17,6 +17,7 @@
{MR_MEMORY_HOTPLUG, "memory_hotplug"}, \
{MR_SYSCALL, "syscall_or_cpuset"}, \
{MR_MEMPOLICY_MBIND, "mempolicy_mbind"}, \
+ {MR_NUMA_MISPLACED, "numa_misplaced"}, \
{MR_CMA, "cma"}
TRACE_EVENT(mm_migrate_pages,
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
index 1c9fabde69e4..ce0803b8d05f 100644
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -28,12 +28,10 @@ TRACE_EVENT(mm_lru_insertion,
TP_PROTO(
struct page *page,
- unsigned long pfn,
- int lru,
- unsigned long flags
+ int lru
),
- TP_ARGS(page, pfn, lru, flags),
+ TP_ARGS(page, lru),
TP_STRUCT__entry(
__field(struct page *, page )
@@ -44,9 +42,9 @@ TRACE_EVENT(mm_lru_insertion,
TP_fast_assign(
__entry->page = page;
- __entry->pfn = pfn;
+ __entry->pfn = page_to_pfn(page);
__entry->lru = lru;
- __entry->flags = flags;
+ __entry->flags = trace_pagemap_flags(page);
),
/* Flag format is based on page-types.c formatting for pagemap */
@@ -64,9 +62,9 @@ TRACE_EVENT(mm_lru_insertion,
TRACE_EVENT(mm_lru_activate,
- TP_PROTO(struct page *page, unsigned long pfn),
+ TP_PROTO(struct page *page),
- TP_ARGS(page, pfn),
+ TP_ARGS(page),
TP_STRUCT__entry(
__field(struct page *, page )
@@ -75,7 +73,7 @@ TRACE_EVENT(mm_lru_activate,
TP_fast_assign(
__entry->page = page;
- __entry->pfn = pfn;
+ __entry->pfn = page_to_pfn(page);
),
/* Flag format is based on page-types.c formatting for pagemap */
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index d6629d49a243..6925f5b42f89 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -13,6 +13,17 @@
#define KEXEC_PRESERVE_CONTEXT 0x00000002
#define KEXEC_ARCH_MASK 0xffff0000
+/*
+ * Kexec file load interface flags.
+ * KEXEC_FILE_UNLOAD : Unload already loaded kexec/kdump image.
+ * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image.
+ * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd
+ * fd field.
+ */
+#define KEXEC_FILE_UNLOAD 0x00000001
+#define KEXEC_FILE_ON_CRASH 0x00000002
+#define KEXEC_FILE_NO_INITRAMFS 0x00000004
+
/* These values match the ELF architecture values.
* Unless there is a good reason that should continue to be the case.
*/
diff --git a/init/Kconfig b/init/Kconfig
index 85fb9851b326..1dfdd81ff9bd 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -783,8 +783,13 @@ endchoice
endmenu # "RCU Subsystem"
+config BUILD_BIN2C
+ bool
+ default n
+
config IKCONFIG
tristate "Kernel .config support"
+ select BUILD_BIN2C
---help---
This option enables the complete Linux kernel ".config" file
contents to be saved in the kernel. It provides documentation
@@ -807,15 +812,53 @@ config LOG_BUF_SHIFT
range 12 21
default 17
help
- Select kernel log buffer size as a power of 2.
+ Select the minimal kernel log buffer size as a power of 2.
+ The final size is affected by LOG_CPU_MAX_BUF_SHIFT config
+ parameter, see below. Any higher size also might be forced
+ by "log_buf_len" boot parameter.
+
Examples:
- 17 => 128 KB
+ 17 => 128 KB
16 => 64 KB
- 15 => 32 KB
- 14 => 16 KB
+ 15 => 32 KB
+ 14 => 16 KB
13 => 8 KB
12 => 4 KB
+config LOG_CPU_MAX_BUF_SHIFT
+ int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
+ range 0 21
+ default 12 if !BASE_SMALL
+ default 0 if BASE_SMALL
+ help
+ This option allows to increase the default ring buffer size
+ according to the number of CPUs. The value defines the contribution
+ of each CPU as a power of 2. The used space is typically only few
+ lines however it might be much more when problems are reported,
+ e.g. backtraces.
+
+ The increased size means that a new buffer has to be allocated and
+ the original static one is unused. It makes sense only on systems
+ with more CPUs. Therefore this value is used only when the sum of
+ contributions is greater than the half of the default kernel ring
+ buffer as defined by LOG_BUF_SHIFT. The default values are set
+ so that more than 64 CPUs are needed to trigger the allocation.
+
+ Also this option is ignored when "log_buf_len" kernel parameter is
+ used as it forces an exact (power of two) size of the ring buffer.
+
+ The number of possible CPUs is used for this computation ignoring
+ hotplugging making the compuation optimal for the the worst case
+ scenerio while allowing a simple algorithm to be used from bootup.
+
+ Examples shift values and their meaning:
+ 17 => 128 KB for each CPU
+ 16 => 64 KB for each CPU
+ 15 => 32 KB for each CPU
+ 14 => 16 KB for each CPU
+ 13 => 8 KB for each CPU
+ 12 => 4 KB for each CPU
+
#
# Architectures with an unreliable sched_clock() should select this:
#
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 82f22885c87e..b6237c31b0e2 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -539,12 +539,6 @@ void __init prepare_namespace(void)
{
int is_floppy;
- if (root_delay) {
- printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
- root_delay);
- ssleep(root_delay);
- }
-
/*
* wait for the known devices to complete their probing
*
@@ -571,6 +565,12 @@ void __init prepare_namespace(void)
if (initrd_load())
goto out;
+ if (root_delay) {
+ pr_info("Waiting %d sec before mounting root device...\n",
+ root_delay);
+ ssleep(root_delay);
+ }
+
/* wait for any asynchronous scanning to complete */
if ((ROOT_DEV == 0) && root_wait) {
printk(KERN_INFO "Waiting for root device %s...\n",
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index a8227022e3a0..e5d059e8aa11 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -311,9 +311,9 @@ static int exit_code;
static int decompress_error;
static int crd_infd, crd_outfd;
-static int __init compr_fill(void *buf, unsigned int len)
+static long __init compr_fill(void *buf, unsigned long len)
{
- int r = sys_read(crd_infd, buf, len);
+ long r = sys_read(crd_infd, buf, len);
if (r < 0)
printk(KERN_ERR "RAMDISK: error while reading compressed data");
else if (r == 0)
@@ -321,13 +321,13 @@ static int __init compr_fill(void *buf, unsigned int len)
return r;
}
-static int __init compr_flush(void *window, unsigned int outcnt)
+static long __init compr_flush(void *window, unsigned long outcnt)
{
- int written = sys_write(crd_outfd, window, outcnt);
+ long written = sys_write(crd_outfd, window, outcnt);
if (written != outcnt) {
if (decompress_error == 0)
printk(KERN_ERR
- "RAMDISK: incomplete write (%d != %d)\n",
+ "RAMDISK: incomplete write (%ld != %ld)\n",
written, outcnt);
decompress_error = 1;
return -1;
diff --git a/init/initramfs.c b/init/initramfs.c
index a8497fab1c3d..a7566031242e 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -19,6 +19,29 @@
#include <linux/syscalls.h>
#include <linux/utime.h>
+static ssize_t __init xwrite(int fd, const char *p, size_t count)
+{
+ ssize_t out = 0;
+
+ /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
+ while (count) {
+ ssize_t rv = sys_write(fd, p, count);
+
+ if (rv < 0) {
+ if (rv == -EINTR || rv == -EAGAIN)
+ continue;
+ return out ? out : rv;
+ } else if (rv == 0)
+ break;
+
+ p += rv;
+ out += rv;
+ count -= rv;
+ }
+
+ return out;
+}
+
static __initdata char *message;
static void __init error(char *x)
{
@@ -174,7 +197,7 @@ static __initdata enum state {
} state, next_state;
static __initdata char *victim;
-static __initdata unsigned count;
+static unsigned long count __initdata;
static __initdata loff_t this_header, next_header;
static inline void __init eat(unsigned n)
@@ -186,7 +209,7 @@ static inline void __init eat(unsigned n)
static __initdata char *vcollected;
static __initdata char *collected;
-static __initdata int remains;
+static long remains __initdata;
static __initdata char *collect;
static void __init read_into(char *buf, unsigned size, enum state next)
@@ -213,7 +236,7 @@ static int __init do_start(void)
static int __init do_collect(void)
{
- unsigned n = remains;
+ unsigned long n = remains;
if (count < n)
n = count;
memcpy(collect, victim, n);
@@ -346,7 +369,7 @@ static int __init do_name(void)
static int __init do_copy(void)
{
if (count >= body_len) {
- sys_write(wfd, victim, body_len);
+ xwrite(wfd, victim, body_len);
sys_close(wfd);
do_utime(vcollected, mtime);
kfree(vcollected);
@@ -354,7 +377,7 @@ static int __init do_copy(void)
state = SkipIt;
return 0;
} else {
- sys_write(wfd, victim, count);
+ xwrite(wfd, victim, count);
body_len -= count;
eat(count);
return 1;
@@ -384,7 +407,7 @@ static __initdata int (*actions[])(void) = {
[Reset] = do_reset,
};
-static int __init write_buffer(char *buf, unsigned len)
+static long __init write_buffer(char *buf, unsigned long len)
{
count = len;
victim = buf;
@@ -394,11 +417,11 @@ static int __init write_buffer(char *buf, unsigned len)
return len - count;
}
-static int __init flush_buffer(void *bufv, unsigned len)
+static long __init flush_buffer(void *bufv, unsigned long len)
{
char *buf = (char *) bufv;
- int written;
- int origLen = len;
+ long written;
+ long origLen = len;
if (message)
return -1;
while ((written = write_buffer(buf, len)) < len && !message) {
@@ -417,13 +440,13 @@ static int __init flush_buffer(void *bufv, unsigned len)
return origLen;
}
-static unsigned my_inptr; /* index of next byte to be processed in inbuf */
+static unsigned long my_inptr; /* index of next byte to be processed in inbuf */
#include <linux/decompress/generic.h>
-static char * __init unpack_to_rootfs(char *buf, unsigned len)
+static char * __init unpack_to_rootfs(char *buf, unsigned long len)
{
- int written, res;
+ long written;
decompress_fn decompress;
const char *compress_name;
static __initdata char msg_buf[64];
@@ -457,7 +480,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len)
decompress = decompress_method(buf, len, &compress_name);
pr_debug("Detected %s compressed data\n", compress_name);
if (decompress) {
- res = decompress(buf, len, NULL, flush_buffer, NULL,
+ int res = decompress(buf, len, NULL, flush_buffer, NULL,
&my_inptr, error);
if (res)
error("decompressor failed");
@@ -603,8 +626,13 @@ static int __init populate_rootfs(void)
fd = sys_open("/initrd.image",
O_WRONLY|O_CREAT, 0700);
if (fd >= 0) {
- sys_write(fd, (char *)initrd_start,
- initrd_end - initrd_start);
+ ssize_t written = xwrite(fd, (char *)initrd_start,
+ initrd_end - initrd_start);
+
+ if (written != initrd_end - initrd_start)
+ pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
+ written, initrd_end - initrd_start);
+
sys_close(fd);
free_initrd();
}
diff --git a/ipc/shm.c b/ipc/shm.c
index 89fc354156cb..7fc9f9f3a26b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -178,6 +178,7 @@ static void shm_rcu_free(struct rcu_head *head)
static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
{
+ list_del(&s->shm_clist);
ipc_rmid(&shm_ids(ns), &s->shm_perm);
}
@@ -268,37 +269,6 @@ static void shm_close(struct vm_area_struct *vma)
}
/* Called with ns->shm_ids(ns).rwsem locked */
-static int shm_try_destroy_current(int id, void *p, void *data)
-{
- struct ipc_namespace *ns = data;
- struct kern_ipc_perm *ipcp = p;
- struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
-
- if (shp->shm_creator != current)
- return 0;
-
- /*
- * Mark it as orphaned to destroy the segment when
- * kernel.shm_rmid_forced is changed.
- * It is noop if the following shm_may_destroy() returns true.
- */
- shp->shm_creator = NULL;
-
- /*
- * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID
- * is not set, it shouldn't be deleted here.
- */
- if (!ns->shm_rmid_forced)
- return 0;
-
- if (shm_may_destroy(ns, shp)) {
- shm_lock_by_ptr(shp);
- shm_destroy(ns, shp);
- }
- return 0;
-}
-
-/* Called with ns->shm_ids(ns).rwsem locked */
static int shm_try_destroy_orphaned(int id, void *p, void *data)
{
struct ipc_namespace *ns = data;
@@ -329,18 +299,50 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
up_write(&shm_ids(ns).rwsem);
}
-
+/* Locking assumes this will only be called with task == current */
void exit_shm(struct task_struct *task)
{
struct ipc_namespace *ns = task->nsproxy->ipc_ns;
+ struct shmid_kernel *shp, *n;
- if (shm_ids(ns).in_use == 0)
+ if (list_empty(&task->sysvshm.shm_clist))
return;
- /* Destroy all already created segments, but not mapped yet */
+ /*
+ * If kernel.shm_rmid_forced is not set then only keep track of
+ * which shmids are orphaned, so that a later set of the sysctl
+ * can clean them up.
+ */
+ if (!ns->shm_rmid_forced) {
+ down_read(&shm_ids(ns).rwsem);
+ list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
+ shp->shm_creator = NULL;
+ /*
+ * Only under read lock but we are only called on current
+ * so no entry on the list will be shared.
+ */
+ list_del(&task->sysvshm.shm_clist);
+ up_read(&shm_ids(ns).rwsem);
+ return;
+ }
+
+ /*
+ * Destroy all already created segments, that were not yet mapped,
+ * and mark any mapped as orphan to cover the sysctl toggling.
+ * Destroy is skipped if shm_may_destroy() returns false.
+ */
down_write(&shm_ids(ns).rwsem);
- if (shm_ids(ns).in_use)
- idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
+ list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
+ shp->shm_creator = NULL;
+
+ if (shm_may_destroy(ns, shp)) {
+ shm_lock_by_ptr(shp);
+ shm_destroy(ns, shp);
+ }
+ }
+
+ /* Remove the list head from any segments still attached. */
+ list_del(&task->sysvshm.shm_clist);
up_write(&shm_ids(ns).rwsem);
}
@@ -561,6 +563,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
shp->shm_nattch = 0;
shp->shm_file = file;
shp->shm_creator = current;
+ list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
/*
* shmid gets reported as "inode#" in /proc/pid/maps.
diff --git a/kernel/Makefile b/kernel/Makefile
index 973a40cf8068..de62ac0bb571 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -104,7 +104,7 @@ targets += config_data.gz
$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
$(call if_changed,gzip)
- filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;")
+ filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/basic/bin2c; echo "MAGIC_END;")
targets += config_data.h
$(obj)/config_data.h: $(obj)/config_data.gz FORCE
$(call filechk,ikconfiggz)
diff --git a/kernel/acct.c b/kernel/acct.c
index 3cec8c4cee14..98c4a209bef1 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -142,12 +142,12 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
if (acct->active) {
if (act < 0) {
acct->active = 0;
- printk(KERN_INFO "Process accounting paused\n");
+ pr_info("Process accounting paused\n");
}
} else {
if (act > 0) {
acct->active = 1;
- printk(KERN_INFO "Process accounting resumed\n");
+ pr_info("Process accounting resumed\n");
}
}
@@ -262,6 +262,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
if (name) {
struct filename *tmp = getname(name);
+
if (IS_ERR(tmp))
return PTR_ERR(tmp);
error = acct_on(tmp);
@@ -391,7 +392,7 @@ static comp_t encode_comp_t(unsigned long value)
return exp;
}
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
/*
* encode an u64 into a comp2_t (24 bits)
*
@@ -404,7 +405,7 @@ static comp_t encode_comp_t(unsigned long value)
#define MANTSIZE2 20 /* 20 bit mantissa. */
#define EXPSIZE2 5 /* 5 bit base 2 exponent. */
#define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
-#define MAXEXP2 ((1 <<EXPSIZE2) - 1) /* Maximum exponent. */
+#define MAXEXP2 ((1 << EXPSIZE2) - 1) /* Maximum exponent. */
static comp2_t encode_comp2_t(u64 value)
{
@@ -435,7 +436,7 @@ static comp2_t encode_comp2_t(u64 value)
}
#endif
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
/*
* encode an u64 into a 32 bit IEEE float
*/
@@ -444,8 +445,9 @@ static u32 encode_float(u64 value)
unsigned exp = 190;
unsigned u;
- if (value==0) return 0;
- while ((s64)value > 0){
+ if (value == 0)
+ return 0;
+ while ((s64)value > 0) {
value <<= 1;
exp--;
}
@@ -505,16 +507,17 @@ static void do_acct_process(struct bsd_acct_struct *acct,
+ current->group_leader->start_time.tv_nsec;
/* convert nsec -> AHZ */
elapsed = nsec_to_AHZ(run_time);
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
ac.ac_etime = encode_float(elapsed);
#else
ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
- (unsigned long) elapsed : (unsigned long) -1l);
+ (unsigned long) elapsed : (unsigned long) -1l);
#endif
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
{
/* new enlarged etime field */
comp2_t etime = encode_comp2_t(elapsed);
+
ac.ac_etime_hi = etime >> 16;
ac.ac_etime_lo = (u16) etime;
}
@@ -524,15 +527,15 @@ static void do_acct_process(struct bsd_acct_struct *acct,
/* we really need to bite the bullet and change layout */
ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
-#if ACCT_VERSION==2
+#if ACCT_VERSION == 2
ac.ac_ahz = AHZ;
#endif
-#if ACCT_VERSION==1 || ACCT_VERSION==2
+#if ACCT_VERSION == 1 || ACCT_VERSION == 2
/* backward-compatible 16 bit fields */
ac.ac_uid16 = ac.ac_uid;
ac.ac_gid16 = ac.ac_gid;
#endif
-#if ACCT_VERSION==3
+#if ACCT_VERSION == 3
ac.ac_pid = task_tgid_nr_ns(current, ns);
rcu_read_lock();
ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
@@ -593,6 +596,7 @@ void acct_collect(long exitcode, int group_dead)
if (group_dead && current->mm) {
struct vm_area_struct *vma;
+
down_read(&current->mm->mmap_sem);
vma = current->mm->mmap;
while (vma) {
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 8e9bc9c3dbb7..c447cd9848d1 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -106,7 +106,7 @@ static inline struct audit_entry *audit_init_entry(u32 field_count)
if (unlikely(!entry))
return NULL;
- fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL);
+ fields = kcalloc(field_count, sizeof(*fields), GFP_KERNEL);
if (unlikely(!fields)) {
kfree(entry);
return NULL;
@@ -160,7 +160,7 @@ static __u32 *classes[AUDIT_SYSCALL_CLASSES];
int __init audit_register_class(int class, unsigned *list)
{
- __u32 *p = kzalloc(AUDIT_BITMASK_SIZE * sizeof(__u32), GFP_KERNEL);
+ __u32 *p = kcalloc(AUDIT_BITMASK_SIZE, sizeof(__u32), GFP_KERNEL);
if (!p)
return -ENOMEM;
while (*list != ~0U) {
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 9fd4246b04b8..e1d1d1952bfa 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,7 +9,6 @@
#include <linux/page-flags.h>
#include <linux/mmzone.h>
#include <linux/kbuild.h>
-#include <linux/page_cgroup.h>
#include <linux/log2.h>
#include <linux/spinlock_types.h>
@@ -18,7 +17,6 @@ void foo(void)
/* The enum constants to put into include/generated/bounds.h */
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
- DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
#ifdef CONFIG_SMP
DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
#endif
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6f3254e8c137..1d0af8a2c646 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -167,6 +167,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
/* For mmu_notifiers */
const unsigned long mmun_start = addr;
const unsigned long mmun_end = addr + PAGE_SIZE;
+ struct mem_cgroup *memcg;
+
+ err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
+ if (err)
+ return err;
/* For try_to_free_swap() and munlock_vma_page() below */
lock_page(page);
@@ -179,6 +184,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
get_page(kpage);
page_add_new_anon_rmap(kpage, vma, addr);
+ mem_cgroup_commit_charge(kpage, memcg, false);
+ lru_cache_add_active_or_unevictable(kpage, vma);
if (!PageAnon(page)) {
dec_mm_counter(mm, MM_FILEPAGES);
@@ -200,6 +207,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
err = 0;
unlock:
+ mem_cgroup_cancel_charge(kpage, memcg);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
unlock_page(page);
return err;
@@ -315,18 +323,11 @@ retry:
if (!new_page)
goto put_old;
- if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
- goto put_new;
-
__SetPageUptodate(new_page);
copy_highpage(new_page, old_page);
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
ret = __replace_page(vma, vaddr, old_page, new_page);
- if (ret)
- mem_cgroup_uncharge_page(new_page);
-
-put_new:
page_cache_release(new_page);
put_old:
put_page(old_page);
diff --git a/kernel/exit.c b/kernel/exit.c
index e5c4668f1799..20a9c1003a1d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -59,7 +59,7 @@
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
-static void exit_mm(struct task_struct * tsk);
+static void exit_mm(struct task_struct *tsk);
static void __unhash_process(struct task_struct *p, bool group_dead)
{
@@ -151,7 +151,7 @@ static void __exit_signal(struct task_struct *tsk)
spin_unlock(&sighand->siglock);
__cleanup_sighand(sighand);
- clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
+ clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
if (group_dead) {
flush_sigqueue(&sig->shared_pending);
tty_kref_put(tty);
@@ -168,7 +168,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
}
-void release_task(struct task_struct * p)
+void release_task(struct task_struct *p)
{
struct task_struct *leader;
int zap_leader;
@@ -192,7 +192,8 @@ repeat:
*/
zap_leader = 0;
leader = p->group_leader;
- if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
+ if (leader != p && thread_group_empty(leader)
+ && leader->exit_state == EXIT_ZOMBIE) {
/*
* If we were the last child thread and the leader has
* exited already, and the leader's parent ignores SIGCHLD,
@@ -241,7 +242,8 @@ struct pid *session_of_pgrp(struct pid *pgrp)
*
* "I ask you, have you ever known what it is to be an orphan?"
*/
-static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignored_task)
+static int will_become_orphaned_pgrp(struct pid *pgrp,
+ struct task_struct *ignored_task)
{
struct task_struct *p;
@@ -294,9 +296,9 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
struct task_struct *ignored_task = tsk;
if (!parent)
- /* exit: our father is in a different pgrp than
- * we are and we were the only connection outside.
- */
+ /* exit: our father is in a different pgrp than
+ * we are and we were the only connection outside.
+ */
parent = tsk->real_parent;
else
/* reparent: our child is in a different pgrp than
@@ -405,7 +407,7 @@ assign_new_owner:
* Turn us into a lazy TLB process if we
* aren't already..
*/
-static void exit_mm(struct task_struct * tsk)
+static void exit_mm(struct task_struct *tsk)
{
struct mm_struct *mm = tsk->mm;
struct core_state *core_state;
@@ -425,6 +427,7 @@ static void exit_mm(struct task_struct * tsk)
core_state = mm->core_state;
if (core_state) {
struct core_thread self;
+
up_read(&mm->mmap_sem);
self.task = tsk;
@@ -565,6 +568,7 @@ static void forget_original_parent(struct task_struct *father)
list_for_each_entry_safe(p, n, &father->children, sibling) {
struct task_struct *t = p;
+
do {
t->real_parent = reaper;
if (t->parent == father) {
@@ -598,7 +602,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
/*
* This does two things:
*
- * A. Make init inherit all the child processes
+ * A. Make init inherit all the child processes
* B. Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
@@ -648,9 +652,8 @@ static void check_stack_usage(void)
spin_lock(&low_water_lock);
if (free < lowest_to_date) {
- printk(KERN_WARNING "%s (%d) used greatest stack depth: "
- "%lu bytes left\n",
- current->comm, task_pid_nr(current), free);
+ pr_warn("%s (%d) used greatest stack depth: %lu bytes left\n",
+ current->comm, task_pid_nr(current), free);
lowest_to_date = free;
}
spin_unlock(&low_water_lock);
@@ -691,8 +694,7 @@ void do_exit(long code)
* leave this task alone and wait for reboot.
*/
if (unlikely(tsk->flags & PF_EXITING)) {
- printk(KERN_ALERT
- "Fixing recursive fault but reboot is needed!\n");
+ pr_alert("Fixing recursive fault but reboot is needed!\n");
/*
* We can do this unlocked here. The futex code uses
* this flag just to verify whether the pi state
@@ -716,9 +718,9 @@ void do_exit(long code)
raw_spin_unlock_wait(&tsk->pi_lock);
if (unlikely(in_atomic()))
- printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
- current->comm, task_pid_nr(current),
- preempt_count());
+ pr_info("note: %s[%d] exited with preempt_count %d\n",
+ current->comm, task_pid_nr(current),
+ preempt_count());
acct_update_integrals(tsk);
/* sync mm's RSS info before statistics gathering */
@@ -836,7 +838,6 @@ void do_exit(long code)
for (;;)
cpu_relax(); /* For when BUG is null */
}
-
EXPORT_SYMBOL_GPL(do_exit);
void complete_and_exit(struct completion *comp, long code)
@@ -846,7 +847,6 @@ void complete_and_exit(struct completion *comp, long code)
do_exit(code);
}
-
EXPORT_SYMBOL(complete_and_exit);
SYSCALL_DEFINE1(exit, int, error_code)
@@ -869,6 +869,7 @@ do_group_exit(int exit_code)
exit_code = sig->group_exit_code;
else if (!thread_group_empty(current)) {
struct sighand_struct *const sighand = current->sighand;
+
spin_lock_irq(&sighand->siglock);
if (signal_group_exit(sig))
/* Another thread got here before we took the lock. */
@@ -1033,9 +1034,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
* as other threads in the parent group can be right
* here reaping other children at the same time.
*
- * We use thread_group_cputime_adjusted() to get times for the thread
- * group, which consolidates times for all threads in the
- * group including the group leader.
+ * We use thread_group_cputime_adjusted() to get times for
+ * the thread group, which consolidates times for all threads
+ * in the group including the group leader.
*/
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
spin_lock_irq(&p->real_parent->sighand->siglock);
@@ -1417,6 +1418,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
list_for_each_entry(p, &tsk->children, sibling) {
int ret = wait_consider_task(wo, 0, p);
+
if (ret)
return ret;
}
@@ -1430,6 +1432,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
int ret = wait_consider_task(wo, 1, p);
+
if (ret)
return ret;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 8d1063ad74df..197c42d44bef 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -365,12 +365,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
*/
down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
- mm->locked_vm = 0;
- mm->mmap = NULL;
- mm->vmacache_seqnum = 0;
- mm->map_count = 0;
- cpumask_clear(mm_cpumask(mm));
- mm->mm_rb = RB_ROOT;
+ mm->total_vm = oldmm->total_vm;
+ mm->shared_vm = oldmm->shared_vm;
+ mm->exec_vm = oldmm->exec_vm;
+ mm->stack_vm = oldmm->stack_vm;
+
rb_link = &mm->mm_rb.rb_node;
rb_parent = NULL;
pprev = &mm->mmap;
@@ -527,19 +526,37 @@ static void mm_init_aio(struct mm_struct *mm)
#endif
}
+static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
+{
+#ifdef CONFIG_MEMCG
+ mm->owner = p;
+#endif
+}
+
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
{
+ mm->mmap = NULL;
+ mm->mm_rb = RB_ROOT;
+ mm->vmacache_seqnum = 0;
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem);
INIT_LIST_HEAD(&mm->mmlist);
mm->core_state = NULL;
atomic_long_set(&mm->nr_ptes, 0);
+ mm->map_count = 0;
+ mm->locked_vm = 0;
+ mm->pinned_vm = 0;
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
+ mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
+ mmu_notifier_mm_init(mm);
clear_tlb_flush_pending(mm);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+ mm->pmd_huge_pte = NULL;
+#endif
if (current->mm) {
mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -549,11 +566,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
mm->def_flags = 0;
}
- if (likely(!mm_alloc_pgd(mm))) {
- mmu_notifier_mm_init(mm);
- return mm;
- }
+ if (mm_alloc_pgd(mm))
+ goto fail_nopgd;
+
+ if (init_new_context(p, mm))
+ goto fail_nocontext;
+
+ return mm;
+fail_nocontext:
+ mm_free_pgd(mm);
+fail_nopgd:
free_mm(mm);
return NULL;
}
@@ -587,7 +610,6 @@ struct mm_struct *mm_alloc(void)
return NULL;
memset(mm, 0, sizeof(*mm));
- mm_init_cpumask(mm);
return mm_init(mm, current);
}
@@ -819,17 +841,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
goto fail_nomem;
memcpy(mm, oldmm, sizeof(*mm));
- mm_init_cpumask(mm);
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
- mm->pmd_huge_pte = NULL;
-#endif
if (!mm_init(mm, tsk))
goto fail_nomem;
- if (init_new_context(tsk, mm))
- goto fail_nocontext;
-
dup_mm_exe_file(oldmm, mm);
err = dup_mmap(mm, oldmm);
@@ -851,15 +866,6 @@ free_pt:
fail_nomem:
return NULL;
-
-fail_nocontext:
- /*
- * If init_new_context() failed, we cannot use mmput() to free the mm
- * because it calls destroy_context()
- */
- mm_free_pgd(mm);
- free_mm(mm);
- return NULL;
}
static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
@@ -1098,13 +1104,6 @@ static void rt_mutex_init_task(struct task_struct *p)
#endif
}
-#ifdef CONFIG_MEMCG
-void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
- mm->owner = p;
-}
-#endif /* CONFIG_MEMCG */
-
/*
* Initialize POSIX timer handling for a single task.
*/
@@ -1306,10 +1305,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
-#ifdef CONFIG_MEMCG
- p->memcg_batch.do_batch = 0;
- p->memcg_batch.memcg = NULL;
-#endif
#ifdef CONFIG_BCACHE
p->sequential_io = 0;
p->sequential_io_avg = 0;
@@ -1327,6 +1322,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (retval)
goto bad_fork_cleanup_policy;
/* copy all the process information */
+ shm_init_task(p);
retval = copy_semundo(clone_flags, p);
if (retval)
goto bad_fork_cleanup_audit;
@@ -1872,6 +1868,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
*/
exit_sem(current);
}
+ if (unshare_flags & CLONE_NEWIPC) {
+ /* Orphan segments in old ns (see sem above). */
+ exit_shm(current);
+ shm_init_task(current);
+ }
if (new_nsproxy)
switch_task_namespaces(current, new_nsproxy);
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
index 15ff01a76379..edf67c493a8e 100644
--- a/kernel/gcov/fs.c
+++ b/kernel/gcov/fs.c
@@ -784,8 +784,7 @@ static __init int gcov_fs_init(void)
err_remove:
pr_err("init failed\n");
- if (root_node.dentry)
- debugfs_remove(root_node.dentry);
+ debugfs_remove(root_node.dentry);
return rc;
}
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index cb0cf37dac3a..ae5167087845 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -364,7 +364,7 @@ static int __sprint_symbol(char *buffer, unsigned long address,
address += symbol_offset;
name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
if (!name)
- return sprintf(buffer, "0x%lx", address);
+ return sprintf(buffer, "0x%lx", address - symbol_offset);
if (name != buffer)
strcpy(buffer, name);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 369f41a94124..c913c97ea960 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
* Version 2. See the file COPYING for more details.
*/
+#define pr_fmt(fmt) "kexec: " fmt
+
#include <linux/capability.h>
#include <linux/mm.h>
#include <linux/file.h>
@@ -39,6 +41,9 @@
#include <asm/io.h>
#include <asm/sections.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;
@@ -51,6 +56,15 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
/* Flag to indicate we are going to kexec a new kernel */
bool kexec_in_progress = false;
+/*
+ * Declare these symbols weak so that if architecture provides a purgatory,
+ * these will be overridden.
+ */
+char __weak kexec_purgatory[0];
+size_t __weak kexec_purgatory_size = 0;
+
+static int kexec_calculate_store_digests(struct kimage *image);
+
/* Location of the reserved area for the crash kernel */
struct resource crashk_res = {
.name = "Crash kernel",
@@ -124,45 +138,27 @@ static struct page *kimage_alloc_page(struct kimage *image,
gfp_t gfp_mask,
unsigned long dest);
-static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
- unsigned long nr_segments,
- struct kexec_segment __user *segments)
+static int copy_user_segment_list(struct kimage *image,
+ unsigned long nr_segments,
+ struct kexec_segment __user *segments)
{
+ int ret;
size_t segment_bytes;
- struct kimage *image;
- unsigned long i;
- int result;
-
- /* Allocate a controlling structure */
- result = -ENOMEM;
- image = kzalloc(sizeof(*image), GFP_KERNEL);
- if (!image)
- goto out;
-
- image->head = 0;
- image->entry = &image->head;
- image->last_entry = &image->head;
- image->control_page = ~0; /* By default this does not apply */
- image->start = entry;
- image->type = KEXEC_TYPE_DEFAULT;
-
- /* Initialize the list of control pages */
- INIT_LIST_HEAD(&image->control_pages);
-
- /* Initialize the list of destination pages */
- INIT_LIST_HEAD(&image->dest_pages);
-
- /* Initialize the list of unusable pages */
- INIT_LIST_HEAD(&image->unuseable_pages);
/* Read in the segments */
image->nr_segments = nr_segments;
segment_bytes = nr_segments * sizeof(*segments);
- result = copy_from_user(image->segment, segments, segment_bytes);
- if (result) {
- result = -EFAULT;
- goto out;
- }
+ ret = copy_from_user(image->segment, segments, segment_bytes);
+ if (ret)
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static int sanity_check_segment_list(struct kimage *image)
+{
+ int result, i;
+ unsigned long nr_segments = image->nr_segments;
/*
* Verify we have good destination addresses. The caller is
@@ -184,9 +180,9 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
mstart = image->segment[i].mem;
mend = mstart + image->segment[i].memsz;
if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
- goto out;
+ return result;
if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
- goto out;
+ return result;
}
/* Verify our destination addresses do not overlap.
@@ -207,7 +203,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
pend = pstart + image->segment[j].memsz;
/* Do the segments overlap ? */
if ((mend > pstart) && (mstart < pend))
- goto out;
+ return result;
}
}
@@ -219,130 +215,386 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
result = -EINVAL;
for (i = 0; i < nr_segments; i++) {
if (image->segment[i].bufsz > image->segment[i].memsz)
- goto out;
+ return result;
}
- result = 0;
-out:
- if (result == 0)
- *rimage = image;
- else
- kfree(image);
+ /*
+ * Verify we have good destination addresses. Normally
+ * the caller is responsible for making certain we don't
+ * attempt to load the new image into invalid or reserved
+ * areas of RAM. But crash kernels are preloaded into a
+ * reserved area of ram. We must ensure the addresses
+ * are in the reserved area otherwise preloading the
+ * kernel could corrupt things.
+ */
- return result;
+ if (image->type == KEXEC_TYPE_CRASH) {
+ result = -EADDRNOTAVAIL;
+ for (i = 0; i < nr_segments; i++) {
+ unsigned long mstart, mend;
+
+ mstart = image->segment[i].mem;
+ mend = mstart + image->segment[i].memsz - 1;
+ /* Ensure we are within the crash kernel limits */
+ if ((mstart < crashk_res.start) ||
+ (mend > crashk_res.end))
+ return result;
+ }
+ }
+ return 0;
+}
+
+static struct kimage *do_kimage_alloc_init(void)
+{
+ struct kimage *image;
+
+ /* Allocate a controlling structure */
+ image = kzalloc(sizeof(*image), GFP_KERNEL);
+ if (!image)
+ return NULL;
+
+ image->head = 0;
+ image->entry = &image->head;
+ image->last_entry = &image->head;
+ image->control_page = ~0; /* By default this does not apply */
+ image->type = KEXEC_TYPE_DEFAULT;
+
+ /* Initialize the list of control pages */
+ INIT_LIST_HEAD(&image->control_pages);
+
+ /* Initialize the list of destination pages */
+ INIT_LIST_HEAD(&image->dest_pages);
+
+ /* Initialize the list of unusable pages */
+ INIT_LIST_HEAD(&image->unusable_pages);
+
+ return image;
}
static void kimage_free_page_list(struct list_head *list);
-static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
- unsigned long nr_segments,
- struct kexec_segment __user *segments)
+static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
+ unsigned long nr_segments,
+ struct kexec_segment __user *segments,
+ unsigned long flags)
{
- int result;
+ int ret;
struct kimage *image;
+ bool kexec_on_panic = flags & KEXEC_ON_CRASH;
+
+ if (kexec_on_panic) {
+ /* Verify we have a valid entry point */
+ if ((entry < crashk_res.start) || (entry > crashk_res.end))
+ return -EADDRNOTAVAIL;
+ }
/* Allocate and initialize a controlling structure */
- image = NULL;
- result = do_kimage_alloc(&image, entry, nr_segments, segments);
- if (result)
- goto out;
+ image = do_kimage_alloc_init();
+ if (!image)
+ return -ENOMEM;
+
+ image->start = entry;
+
+ ret = copy_user_segment_list(image, nr_segments, segments);
+ if (ret)
+ goto out_free_image;
+
+ ret = sanity_check_segment_list(image);
+ if (ret)
+ goto out_free_image;
+
+ /* Enable the special crash kernel control page allocation policy. */
+ if (kexec_on_panic) {
+ image->control_page = crashk_res.start;
+ image->type = KEXEC_TYPE_CRASH;
+ }
/*
* Find a location for the control code buffer, and add it
* the vector of segments so that it's pages will also be
* counted as destination pages.
*/
- result = -ENOMEM;
+ ret = -ENOMEM;
image->control_code_page = kimage_alloc_control_pages(image,
get_order(KEXEC_CONTROL_PAGE_SIZE));
if (!image->control_code_page) {
pr_err("Could not allocate control_code_buffer\n");
- goto out_free;
+ goto out_free_image;
}
- image->swap_page = kimage_alloc_control_pages(image, 0);
- if (!image->swap_page) {
- pr_err("Could not allocate swap buffer\n");
- goto out_free;
+ if (!kexec_on_panic) {
+ image->swap_page = kimage_alloc_control_pages(image, 0);
+ if (!image->swap_page) {
+ pr_err("Could not allocate swap buffer\n");
+ goto out_free_control_pages;
+ }
}
*rimage = image;
return 0;
-
-out_free:
+out_free_control_pages:
kimage_free_page_list(&image->control_pages);
+out_free_image:
kfree(image);
-out:
- return result;
+ return ret;
}
-static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
- unsigned long nr_segments,
- struct kexec_segment __user *segments)
+static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
{
- int result;
- struct kimage *image;
- unsigned long i;
+ struct fd f = fdget(fd);
+ int ret;
+ struct kstat stat;
+ loff_t pos;
+ ssize_t bytes = 0;
- image = NULL;
- /* Verify we have a valid entry point */
- if ((entry < crashk_res.start) || (entry > crashk_res.end)) {
- result = -EADDRNOTAVAIL;
+ if (!f.file)
+ return -EBADF;
+
+ ret = vfs_getattr(&f.file->f_path, &stat);
+ if (ret)
+ goto out;
+
+ if (stat.size > INT_MAX) {
+ ret = -EFBIG;
goto out;
}
- /* Allocate and initialize a controlling structure */
- result = do_kimage_alloc(&image, entry, nr_segments, segments);
- if (result)
+ /* Don't hand 0 to vmalloc, it whines. */
+ if (stat.size == 0) {
+ ret = -EINVAL;
goto out;
+ }
- /* Enable the special crash kernel control page
- * allocation policy.
- */
- image->control_page = crashk_res.start;
- image->type = KEXEC_TYPE_CRASH;
+ *buf = vmalloc(stat.size);
+ if (!*buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
- /*
- * Verify we have good destination addresses. Normally
- * the caller is responsible for making certain we don't
- * attempt to load the new image into invalid or reserved
- * areas of RAM. But crash kernels are preloaded into a
- * reserved area of ram. We must ensure the addresses
- * are in the reserved area otherwise preloading the
- * kernel could corrupt things.
- */
- result = -EADDRNOTAVAIL;
- for (i = 0; i < nr_segments; i++) {
- unsigned long mstart, mend;
+ pos = 0;
+ while (pos < stat.size) {
+ bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
+ stat.size - pos);
+ if (bytes < 0) {
+ vfree(*buf);
+ ret = bytes;
+ goto out;
+ }
- mstart = image->segment[i].mem;
- mend = mstart + image->segment[i].memsz - 1;
- /* Ensure we are within the crash kernel limits */
- if ((mstart < crashk_res.start) || (mend > crashk_res.end))
- goto out_free;
+ if (bytes == 0)
+ break;
+ pos += bytes;
}
+ if (pos != stat.size) {
+ ret = -EBADF;
+ vfree(*buf);
+ goto out;
+ }
+
+ *buf_len = pos;
+out:
+ fdput(f);
+ return ret;
+}
+
+/* Architectures can provide this probe function */
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ return -ENOEXEC;
+}
+
+void * __weak arch_kexec_kernel_image_load(struct kimage *image)
+{
+ return ERR_PTR(-ENOEXEC);
+}
+
+void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+}
+
+/* Apply relocations of type RELA */
+int __weak
+arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ unsigned int relsec)
+{
+ pr_err("RELA relocation unsupported.\n");
+ return -ENOEXEC;
+}
+
+/* Apply relocations of type REL */
+int __weak
+arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ unsigned int relsec)
+{
+ pr_err("REL relocation unsupported.\n");
+ return -ENOEXEC;
+}
+
+/*
+ * Free up memory used by kernel, initrd, and comand line. This is temporary
+ * memory allocation which is not needed any more after these buffers have
+ * been loaded into separate segments and have been copied elsewhere.
+ */
+static void kimage_file_post_load_cleanup(struct kimage *image)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+
+ vfree(image->kernel_buf);
+ image->kernel_buf = NULL;
+
+ vfree(image->initrd_buf);
+ image->initrd_buf = NULL;
+
+ kfree(image->cmdline_buf);
+ image->cmdline_buf = NULL;
+
+ vfree(pi->purgatory_buf);
+ pi->purgatory_buf = NULL;
+
+ vfree(pi->sechdrs);
+ pi->sechdrs = NULL;
+
+ /* See if architecture has anything to cleanup post load */
+ arch_kimage_file_post_load_cleanup(image);
+
/*
- * Find a location for the control code buffer, and add
- * the vector of segments so that it's pages will also be
- * counted as destination pages.
+ * Above call should have called into bootloader to free up
+ * any data stored in kimage->image_loader_data. It should
+ * be ok now to free it up.
*/
- result = -ENOMEM;
+ kfree(image->image_loader_data);
+ image->image_loader_data = NULL;
+}
+
+/*
+ * In file mode list of segments is prepared by kernel. Copy relevant
+ * data from user space, do error checking, prepare segment list
+ */
+static int
+kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
+ const char __user *cmdline_ptr,
+ unsigned long cmdline_len, unsigned flags)
+{
+ int ret = 0;
+ void *ldata;
+
+ ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
+ &image->kernel_buf_len);
+ if (ret)
+ return ret;
+
+ /* Call arch image probe handlers */
+ ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
+ image->kernel_buf_len);
+
+ if (ret)
+ goto out;
+
+ /* It is possible that there no initramfs is being loaded */
+ if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
+ ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
+ &image->initrd_buf_len);
+ if (ret)
+ goto out;
+ }
+
+ if (cmdline_len) {
+ image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
+ if (!image->cmdline_buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
+ cmdline_len);
+ if (ret) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ image->cmdline_buf_len = cmdline_len;
+
+ /* command line should be a string with last byte null */
+ if (image->cmdline_buf[cmdline_len - 1] != '\0') {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ /* Call arch image load handlers */
+ ldata = arch_kexec_kernel_image_load(image);
+
+ if (IS_ERR(ldata)) {
+ ret = PTR_ERR(ldata);
+ goto out;
+ }
+
+ image->image_loader_data = ldata;
+out:
+ /* In case of error, free up all allocated memory in this function */
+ if (ret)
+ kimage_file_post_load_cleanup(image);
+ return ret;
+}
+
+static int
+kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
+ int initrd_fd, const char __user *cmdline_ptr,
+ unsigned long cmdline_len, unsigned long flags)
+{
+ int ret;
+ struct kimage *image;
+ bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
+
+ image = do_kimage_alloc_init();
+ if (!image)
+ return -ENOMEM;
+
+ image->file_mode = 1;
+
+ if (kexec_on_panic) {
+ /* Enable special crash kernel control page alloc policy. */
+ image->control_page = crashk_res.start;
+ image->type = KEXEC_TYPE_CRASH;
+ }
+
+ ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
+ cmdline_ptr, cmdline_len, flags);
+ if (ret)
+ goto out_free_image;
+
+ ret = sanity_check_segment_list(image);
+ if (ret)
+ goto out_free_post_load_bufs;
+
+ ret = -ENOMEM;
image->control_code_page = kimage_alloc_control_pages(image,
get_order(KEXEC_CONTROL_PAGE_SIZE));
if (!image->control_code_page) {
pr_err("Could not allocate control_code_buffer\n");
- goto out_free;
+ goto out_free_post_load_bufs;
+ }
+
+ if (!kexec_on_panic) {
+ image->swap_page = kimage_alloc_control_pages(image, 0);
+ if (!image->swap_page) {
+ pr_err(KERN_ERR "Could not allocate swap buffer\n");
+ goto out_free_control_pages;
+ }
}
*rimage = image;
return 0;
-
-out_free:
+out_free_control_pages:
+ kimage_free_page_list(&image->control_pages);
+out_free_post_load_bufs:
+ kimage_file_post_load_cleanup(image);
+out_free_image:
kfree(image);
-out:
- return result;
+ return ret;
}
static int kimage_is_destination_range(struct kimage *image,
@@ -608,7 +860,7 @@ static void kimage_free_extra_pages(struct kimage *image)
kimage_free_page_list(&image->dest_pages);
/* Walk through and free any unusable pages I have cached */
- kimage_free_page_list(&image->unuseable_pages);
+ kimage_free_page_list(&image->unusable_pages);
}
static void kimage_terminate(struct kimage *image)
@@ -662,6 +914,14 @@ static void kimage_free(struct kimage *image)
/* Free the kexec control pages... */
kimage_free_page_list(&image->control_pages);
+
+ /*
+ * Free up any temporary buffers allocated. This might hit if
+ * error occurred much later after buffer allocation.
+ */
+ if (image->file_mode)
+ kimage_file_post_load_cleanup(image);
+
kfree(image);
}
@@ -731,7 +991,7 @@ static struct page *kimage_alloc_page(struct kimage *image,
/* If the page cannot be used file it away */
if (page_to_pfn(page) >
(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
- list_add(&page->lru, &image->unuseable_pages);
+ list_add(&page->lru, &image->unusable_pages);
continue;
}
addr = page_to_pfn(page) << PAGE_SHIFT;
@@ -790,10 +1050,14 @@ static int kimage_load_normal_segment(struct kimage *image,
unsigned long maddr;
size_t ubytes, mbytes;
int result;
- unsigned char __user *buf;
+ unsigned char __user *buf = NULL;
+ unsigned char *kbuf = NULL;
result = 0;
- buf = segment->buf;
+ if (image->file_mode)
+ kbuf = segment->kbuf;
+ else
+ buf = segment->buf;
ubytes = segment->bufsz;
mbytes = segment->memsz;
maddr = segment->mem;
@@ -825,7 +1089,11 @@ static int kimage_load_normal_segment(struct kimage *image,
PAGE_SIZE - (maddr & ~PAGE_MASK));
uchunk = min(ubytes, mchunk);
- result = copy_from_user(ptr, buf, uchunk);
+ /* For file based kexec, source pages are in kernel memory */
+ if (image->file_mode)
+ memcpy(ptr, kbuf, uchunk);
+ else
+ result = copy_from_user(ptr, buf, uchunk);
kunmap(page);
if (result) {
result = -EFAULT;
@@ -833,7 +1101,10 @@ static int kimage_load_normal_segment(struct kimage *image,
}
ubytes -= uchunk;
maddr += mchunk;
- buf += mchunk;
+ if (image->file_mode)
+ kbuf += mchunk;
+ else
+ buf += mchunk;
mbytes -= mchunk;
}
out:
@@ -850,10 +1121,14 @@ static int kimage_load_crash_segment(struct kimage *image,
unsigned long maddr;
size_t ubytes, mbytes;
int result;
- unsigned char __user *buf;
+ unsigned char __user *buf = NULL;
+ unsigned char *kbuf = NULL;
result = 0;
- buf = segment->buf;
+ if (image->file_mode)
+ kbuf = segment->kbuf;
+ else
+ buf = segment->buf;
ubytes = segment->bufsz;
mbytes = segment->memsz;
maddr = segment->mem;
@@ -876,7 +1151,12 @@ static int kimage_load_crash_segment(struct kimage *image,
/* Zero the trailing part of the page */
memset(ptr + uchunk, 0, mchunk - uchunk);
}
- result = copy_from_user(ptr, buf, uchunk);
+
+ /* For file based kexec, source pages are in kernel memory */
+ if (image->file_mode)
+ memcpy(ptr, kbuf, uchunk);
+ else
+ result = copy_from_user(ptr, buf, uchunk);
kexec_flush_icache_page(page);
kunmap(page);
if (result) {
@@ -885,7 +1165,10 @@ static int kimage_load_crash_segment(struct kimage *image,
}
ubytes -= uchunk;
maddr += mchunk;
- buf += mchunk;
+ if (image->file_mode)
+ kbuf += mchunk;
+ else
+ buf += mchunk;
mbytes -= mchunk;
}
out:
@@ -985,16 +1268,16 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
/* Loading another kernel to reboot into */
if ((flags & KEXEC_ON_CRASH) == 0)
- result = kimage_normal_alloc(&image, entry,
- nr_segments, segments);
+ result = kimage_alloc_init(&image, entry, nr_segments,
+ segments, flags);
/* Loading another kernel to switch to if this one crashes */
else if (flags & KEXEC_ON_CRASH) {
/* Free any current crash dump kernel before
* we corrupt it.
*/
kimage_free(xchg(&kexec_crash_image, NULL));
- result = kimage_crash_alloc(&image, entry,
- nr_segments, segments);
+ result = kimage_alloc_init(&image, entry, nr_segments,
+ segments, flags);
crash_map_reserved_pages();
}
if (result)
@@ -1076,6 +1359,82 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
}
#endif
+SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
+ unsigned long, cmdline_len, const char __user *, cmdline_ptr,
+ unsigned long, flags)
+{
+ int ret = 0, i;
+ struct kimage **dest_image, *image;
+
+ /* We only trust the superuser with rebooting the system. */
+ if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+ return -EPERM;
+
+ /* Make sure we have a legal set of flags */
+ if (flags != (flags & KEXEC_FILE_FLAGS))
+ return -EINVAL;
+
+ image = NULL;
+
+ if (!mutex_trylock(&kexec_mutex))
+ return -EBUSY;
+
+ dest_image = &kexec_image;
+ if (flags & KEXEC_FILE_ON_CRASH)
+ dest_image = &kexec_crash_image;
+
+ if (flags & KEXEC_FILE_UNLOAD)
+ goto exchange;
+
+ /*
+ * In case of crash, new kernel gets loaded in reserved region. It is
+ * same memory where old crash kernel might be loaded. Free any
+ * current crash dump kernel before we corrupt it.
+ */
+ if (flags & KEXEC_FILE_ON_CRASH)
+ kimage_free(xchg(&kexec_crash_image, NULL));
+
+ ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
+ cmdline_len, flags);
+ if (ret)
+ goto out;
+
+ ret = machine_kexec_prepare(image);
+ if (ret)
+ goto out;
+
+ ret = kexec_calculate_store_digests(image);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ struct kexec_segment *ksegment;
+
+ ksegment = &image->segment[i];
+ pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
+ i, ksegment->buf, ksegment->bufsz, ksegment->mem,
+ ksegment->memsz);
+
+ ret = kimage_load_segment(image, &image->segment[i]);
+ if (ret)
+ goto out;
+ }
+
+ kimage_terminate(image);
+
+ /*
+ * Free up any temporary buffers allocated which are not needed
+ * after image has been loaded
+ */
+ kimage_file_post_load_cleanup(image);
+exchange:
+ image = xchg(dest_image, image);
+out:
+ mutex_unlock(&kexec_mutex);
+ kimage_free(image);
+ return ret;
+}
+
void crash_kexec(struct pt_regs *regs)
{
/* Take the kexec_mutex here to prevent sys_kexec_load
@@ -1628,6 +1987,683 @@ static int __init crash_save_vmcoreinfo_init(void)
subsys_initcall(crash_save_vmcoreinfo_init);
+static int __kexec_add_segment(struct kimage *image, char *buf,
+ unsigned long bufsz, unsigned long mem,
+ unsigned long memsz)
+{
+ struct kexec_segment *ksegment;
+
+ ksegment = &image->segment[image->nr_segments];
+ ksegment->kbuf = buf;
+ ksegment->bufsz = bufsz;
+ ksegment->mem = mem;
+ ksegment->memsz = memsz;
+ image->nr_segments++;
+
+ return 0;
+}
+
+static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
+ struct kexec_buf *kbuf)
+{
+ struct kimage *image = kbuf->image;
+ unsigned long temp_start, temp_end;
+
+ temp_end = min(end, kbuf->buf_max);
+ temp_start = temp_end - kbuf->memsz;
+
+ do {
+ /* align down start */
+ temp_start = temp_start & (~(kbuf->buf_align - 1));
+
+ if (temp_start < start || temp_start < kbuf->buf_min)
+ return 0;
+
+ temp_end = temp_start + kbuf->memsz - 1;
+
+ /*
+ * Make sure this does not conflict with any of existing
+ * segments
+ */
+ if (kimage_is_destination_range(image, temp_start, temp_end)) {
+ temp_start = temp_start - PAGE_SIZE;
+ continue;
+ }
+
+ /* We found a suitable memory range */
+ break;
+ } while (1);
+
+ /* If we are here, we found a suitable memory range */
+ __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+ kbuf->memsz);
+
+ /* Success, stop navigating through remaining System RAM ranges */
+ return 1;
+}
+
+static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
+ struct kexec_buf *kbuf)
+{
+ struct kimage *image = kbuf->image;
+ unsigned long temp_start, temp_end;
+
+ temp_start = max(start, kbuf->buf_min);
+
+ do {
+ temp_start = ALIGN(temp_start, kbuf->buf_align);
+ temp_end = temp_start + kbuf->memsz - 1;
+
+ if (temp_end > end || temp_end > kbuf->buf_max)
+ return 0;
+ /*
+ * Make sure this does not conflict with any of existing
+ * segments
+ */
+ if (kimage_is_destination_range(image, temp_start, temp_end)) {
+ temp_start = temp_start + PAGE_SIZE;
+ continue;
+ }
+
+ /* We found a suitable memory range */
+ break;
+ } while (1);
+
+ /* If we are here, we found a suitable memory range */
+ __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+ kbuf->memsz);
+
+ /* Success, stop navigating through remaining System RAM ranges */
+ return 1;
+}
+
+static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
+{
+ struct kexec_buf *kbuf = (struct kexec_buf *)arg;
+ unsigned long sz = end - start + 1;
+
+ /* Returning 0 will take to next memory range */
+ if (sz < kbuf->memsz)
+ return 0;
+
+ if (end < kbuf->buf_min || start > kbuf->buf_max)
+ return 0;
+
+ /*
+ * Allocate memory top down with-in ram range. Otherwise bottom up
+ * allocation.
+ */
+ if (kbuf->top_down)
+ return locate_mem_hole_top_down(start, end, kbuf);
+ return locate_mem_hole_bottom_up(start, end, kbuf);
+}
+
+/*
+ * Helper function for placing a buffer in a kexec segment. This assumes
+ * that kexec_mutex is held.
+ */
+int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
+ unsigned long memsz, unsigned long buf_align,
+ unsigned long buf_min, unsigned long buf_max,
+ bool top_down, unsigned long *load_addr)
+{
+
+ struct kexec_segment *ksegment;
+ struct kexec_buf buf, *kbuf;
+ int ret;
+
+ /* Currently adding segment this way is allowed only in file mode */
+ if (!image->file_mode)
+ return -EINVAL;
+
+ if (image->nr_segments >= KEXEC_SEGMENT_MAX)
+ return -EINVAL;
+
+ /*
+ * Make sure we are not trying to add buffer after allocating
+ * control pages. All segments need to be placed first before
+ * any control pages are allocated. As control page allocation
+ * logic goes through list of segments to make sure there are
+ * no destination overlaps.
+ */
+ if (!list_empty(&image->control_pages)) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ memset(&buf, 0, sizeof(struct kexec_buf));
+ kbuf = &buf;
+ kbuf->image = image;
+ kbuf->buffer = buffer;
+ kbuf->bufsz = bufsz;
+
+ kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
+ kbuf->buf_align = max(buf_align, PAGE_SIZE);
+ kbuf->buf_min = buf_min;
+ kbuf->buf_max = buf_max;
+ kbuf->top_down = top_down;
+
+ /* Walk the RAM ranges and allocate a suitable range for the buffer */
+ if (image->type == KEXEC_TYPE_CRASH)
+ ret = walk_iomem_res("Crash kernel",
+ IORESOURCE_MEM | IORESOURCE_BUSY,
+ crashk_res.start, crashk_res.end, kbuf,
+ locate_mem_hole_callback);
+ else
+ ret = walk_system_ram_res(0, -1, kbuf,
+ locate_mem_hole_callback);
+ if (ret != 1) {
+ /* A suitable memory range could not be found for buffer */
+ return -EADDRNOTAVAIL;
+ }
+
+ /* Found a suitable memory range */
+ ksegment = &image->segment[image->nr_segments - 1];
+ *load_addr = ksegment->mem;
+ return 0;
+}
+
+/* Calculate and store the digest of segments */
+static int kexec_calculate_store_digests(struct kimage *image)
+{
+ struct crypto_shash *tfm;
+ struct shash_desc *desc;
+ int ret = 0, i, j, zero_buf_sz, sha_region_sz;
+ size_t desc_size, nullsz;
+ char *digest;
+ void *zero_buf;
+ struct kexec_sha_region *sha_regions;
+ struct purgatory_info *pi = &image->purgatory_info;
+
+ zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
+ zero_buf_sz = PAGE_SIZE;
+
+ tfm = crypto_alloc_shash("sha256", 0, 0);
+ if (IS_ERR(tfm)) {
+ ret = PTR_ERR(tfm);
+ goto out;
+ }
+
+ desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
+ desc = kzalloc(desc_size, GFP_KERNEL);
+ if (!desc) {
+ ret = -ENOMEM;
+ goto out_free_tfm;
+ }
+
+ sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
+ sha_regions = vzalloc(sha_region_sz);
+ if (!sha_regions)
+ goto out_free_desc;
+
+ desc->tfm = tfm;
+ desc->flags = 0;
+
+ ret = crypto_shash_init(desc);
+ if (ret < 0)
+ goto out_free_sha_regions;
+
+ digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
+ if (!digest) {
+ ret = -ENOMEM;
+ goto out_free_sha_regions;
+ }
+
+ for (j = i = 0; i < image->nr_segments; i++) {
+ struct kexec_segment *ksegment;
+
+ ksegment = &image->segment[i];
+ /*
+ * Skip purgatory as it will be modified once we put digest
+ * info in purgatory.
+ */
+ if (ksegment->kbuf == pi->purgatory_buf)
+ continue;
+
+ ret = crypto_shash_update(desc, ksegment->kbuf,
+ ksegment->bufsz);
+ if (ret)
+ break;
+
+ /*
+ * Assume rest of the buffer is filled with zero and
+ * update digest accordingly.
+ */
+ nullsz = ksegment->memsz - ksegment->bufsz;
+ while (nullsz) {
+ unsigned long bytes = nullsz;
+
+ if (bytes > zero_buf_sz)
+ bytes = zero_buf_sz;
+ ret = crypto_shash_update(desc, zero_buf, bytes);
+ if (ret)
+ break;
+ nullsz -= bytes;
+ }
+
+ if (ret)
+ break;
+
+ sha_regions[j].start = ksegment->mem;
+ sha_regions[j].len = ksegment->memsz;
+ j++;
+ }
+
+ if (!ret) {
+ ret = crypto_shash_final(desc, digest);
+ if (ret)
+ goto out_free_digest;
+ ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
+ sha_regions, sha_region_sz, 0);
+ if (ret)
+ goto out_free_digest;
+
+ ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
+ digest, SHA256_DIGEST_SIZE, 0);
+ if (ret)
+ goto out_free_digest;
+ }
+
+out_free_digest:
+ kfree(digest);
+out_free_sha_regions:
+ vfree(sha_regions);
+out_free_desc:
+ kfree(desc);
+out_free_tfm:
+ kfree(tfm);
+out:
+ return ret;
+}
+
+/* Actually load purgatory. Lot of code taken from kexec-tools */
+static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
+ unsigned long max, int top_down)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+ unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
+ unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
+ unsigned char *buf_addr, *src;
+ int i, ret = 0, entry_sidx = -1;
+ const Elf_Shdr *sechdrs_c;
+ Elf_Shdr *sechdrs = NULL;
+ void *purgatory_buf = NULL;
+
+ /*
+ * sechdrs_c points to section headers in purgatory and are read
+ * only. No modifications allowed.
+ */
+ sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
+
+ /*
+ * We can not modify sechdrs_c[] and its fields. It is read only.
+ * Copy it over to a local copy where one can store some temporary
+ * data and free it at the end. We need to modify ->sh_addr and
+ * ->sh_offset fields to keep track of permanent and temporary
+ * locations of sections.
+ */
+ sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+ if (!sechdrs)
+ return -ENOMEM;
+
+ memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+
+ /*
+ * We seem to have multiple copies of sections. First copy is which
+ * is embedded in kernel in read only section. Some of these sections
+ * will be copied to a temporary buffer and relocated. And these
+ * sections will finally be copied to their final destination at
+ * segment load time.
+ *
+ * Use ->sh_offset to reflect section address in memory. It will
+ * point to original read only copy if section is not allocatable.
+ * Otherwise it will point to temporary copy which will be relocated.
+ *
+ * Use ->sh_addr to contain final address of the section where it
+ * will go during execution time.
+ */
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_NOBITS)
+ continue;
+
+ sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
+ sechdrs[i].sh_offset;
+ }
+
+ /*
+ * Identify entry point section and make entry relative to section
+ * start.
+ */
+ entry = pi->ehdr->e_entry;
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ continue;
+
+ if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
+ continue;
+
+ /* Make entry section relative */
+ if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
+ ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
+ pi->ehdr->e_entry)) {
+ entry_sidx = i;
+ entry -= sechdrs[i].sh_addr;
+ break;
+ }
+ }
+
+ /* Determine how much memory is needed to load relocatable object. */
+ buf_align = 1;
+ bss_align = 1;
+ buf_sz = 0;
+ bss_sz = 0;
+
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ continue;
+
+ align = sechdrs[i].sh_addralign;
+ if (sechdrs[i].sh_type != SHT_NOBITS) {
+ if (buf_align < align)
+ buf_align = align;
+ buf_sz = ALIGN(buf_sz, align);
+ buf_sz += sechdrs[i].sh_size;
+ } else {
+ /* bss section */
+ if (bss_align < align)
+ bss_align = align;
+ bss_sz = ALIGN(bss_sz, align);
+ bss_sz += sechdrs[i].sh_size;
+ }
+ }
+
+ /* Determine the bss padding required to align bss properly */
+ bss_pad = 0;
+ if (buf_sz & (bss_align - 1))
+ bss_pad = bss_align - (buf_sz & (bss_align - 1));
+
+ memsz = buf_sz + bss_pad + bss_sz;
+
+ /* Allocate buffer for purgatory */
+ purgatory_buf = vzalloc(buf_sz);
+ if (!purgatory_buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (buf_align < bss_align)
+ buf_align = bss_align;
+
+ /* Add buffer to segment list */
+ ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
+ buf_align, min, max, top_down,
+ &pi->purgatory_load_addr);
+ if (ret)
+ goto out;
+
+ /* Load SHF_ALLOC sections */
+ buf_addr = purgatory_buf;
+ load_addr = curr_load_addr = pi->purgatory_load_addr;
+ bss_addr = load_addr + buf_sz + bss_pad;
+
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ continue;
+
+ align = sechdrs[i].sh_addralign;
+ if (sechdrs[i].sh_type != SHT_NOBITS) {
+ curr_load_addr = ALIGN(curr_load_addr, align);
+ offset = curr_load_addr - load_addr;
+ /* We already modifed ->sh_offset to keep src addr */
+ src = (char *) sechdrs[i].sh_offset;
+ memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
+
+ /* Store load address and source address of section */
+ sechdrs[i].sh_addr = curr_load_addr;
+
+ /*
+ * This section got copied to temporary buffer. Update
+ * ->sh_offset accordingly.
+ */
+ sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
+
+ /* Advance to the next address */
+ curr_load_addr += sechdrs[i].sh_size;
+ } else {
+ bss_addr = ALIGN(bss_addr, align);
+ sechdrs[i].sh_addr = bss_addr;
+ bss_addr += sechdrs[i].sh_size;
+ }
+ }
+
+ /* Update entry point based on load address of text section */
+ if (entry_sidx >= 0)
+ entry += sechdrs[entry_sidx].sh_addr;
+
+ /* Make kernel jump to purgatory after shutdown */
+ image->start = entry;
+
+ /* Used later to get/set symbol values */
+ pi->sechdrs = sechdrs;
+
+ /*
+ * Used later to identify which section is purgatory and skip it
+ * from checksumming.
+ */
+ pi->purgatory_buf = purgatory_buf;
+ return ret;
+out:
+ vfree(sechdrs);
+ vfree(purgatory_buf);
+ return ret;
+}
+
+static int kexec_apply_relocations(struct kimage *image)
+{
+ int i, ret;
+ struct purgatory_info *pi = &image->purgatory_info;
+ Elf_Shdr *sechdrs = pi->sechdrs;
+
+ /* Apply relocations */
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ Elf_Shdr *section, *symtab;
+
+ if (sechdrs[i].sh_type != SHT_RELA &&
+ sechdrs[i].sh_type != SHT_REL)
+ continue;
+
+ /*
+ * For section of type SHT_RELA/SHT_REL,
+ * ->sh_link contains section header index of associated
+ * symbol table. And ->sh_info contains section header
+ * index of section to which relocations apply.
+ */
+ if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
+ sechdrs[i].sh_link >= pi->ehdr->e_shnum)
+ return -ENOEXEC;
+
+ section = &sechdrs[sechdrs[i].sh_info];
+ symtab = &sechdrs[sechdrs[i].sh_link];
+
+ if (!(section->sh_flags & SHF_ALLOC))
+ continue;
+
+ /*
+ * symtab->sh_link contain section header index of associated
+ * string table.
+ */
+ if (symtab->sh_link >= pi->ehdr->e_shnum)
+ /* Invalid section number? */
+ continue;
+
+ /*
+ * Respective archicture needs to provide support for applying
+ * relocations of type SHT_RELA/SHT_REL.
+ */
+ if (sechdrs[i].sh_type == SHT_RELA)
+ ret = arch_kexec_apply_relocations_add(pi->ehdr,
+ sechdrs, i);
+ else if (sechdrs[i].sh_type == SHT_REL)
+ ret = arch_kexec_apply_relocations(pi->ehdr,
+ sechdrs, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Load relocatable purgatory object and relocate it appropriately */
+int kexec_load_purgatory(struct kimage *image, unsigned long min,
+ unsigned long max, int top_down,
+ unsigned long *load_addr)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+ int ret;
+
+ if (kexec_purgatory_size <= 0)
+ return -EINVAL;
+
+ if (kexec_purgatory_size < sizeof(Elf_Ehdr))
+ return -ENOEXEC;
+
+ pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
+
+ if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
+ || pi->ehdr->e_type != ET_REL
+ || !elf_check_arch(pi->ehdr)
+ || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
+ return -ENOEXEC;
+
+ if (pi->ehdr->e_shoff >= kexec_purgatory_size
+ || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
+ kexec_purgatory_size - pi->ehdr->e_shoff))
+ return -ENOEXEC;
+
+ ret = __kexec_load_purgatory(image, min, max, top_down);
+ if (ret)
+ return ret;
+
+ ret = kexec_apply_relocations(image);
+ if (ret)
+ goto out;
+
+ *load_addr = pi->purgatory_load_addr;
+ return 0;
+out:
+ vfree(pi->sechdrs);
+ vfree(pi->purgatory_buf);
+ return ret;
+}
+
+static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
+ const char *name)
+{
+ Elf_Sym *syms;
+ Elf_Shdr *sechdrs;
+ Elf_Ehdr *ehdr;
+ int i, k;
+ const char *strtab;
+
+ if (!pi->sechdrs || !pi->ehdr)
+ return NULL;
+
+ sechdrs = pi->sechdrs;
+ ehdr = pi->ehdr;
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type != SHT_SYMTAB)
+ continue;
+
+ if (sechdrs[i].sh_link >= ehdr->e_shnum)
+ /* Invalid strtab section number */
+ continue;
+ strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
+ syms = (Elf_Sym *)sechdrs[i].sh_offset;
+
+ /* Go through symbols for a match */
+ for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
+ if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
+ continue;
+
+ if (strcmp(strtab + syms[k].st_name, name) != 0)
+ continue;
+
+ if (syms[k].st_shndx == SHN_UNDEF ||
+ syms[k].st_shndx >= ehdr->e_shnum) {
+ pr_debug("Symbol: %s has bad section index %d.\n",
+ name, syms[k].st_shndx);
+ return NULL;
+ }
+
+ /* Found the symbol we are looking for */
+ return &syms[k];
+ }
+ }
+
+ return NULL;
+}
+
+void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+ Elf_Sym *sym;
+ Elf_Shdr *sechdr;
+
+ sym = kexec_purgatory_find_symbol(pi, name);
+ if (!sym)
+ return ERR_PTR(-EINVAL);
+
+ sechdr = &pi->sechdrs[sym->st_shndx];
+
+ /*
+ * Returns the address where symbol will finally be loaded after
+ * kexec_load_segment()
+ */
+ return (void *)(sechdr->sh_addr + sym->st_value);
+}
+
+/*
+ * Get or set value of a symbol. If "get_value" is true, symbol value is
+ * returned in buf otherwise symbol value is set based on value in buf.
+ */
+int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
+ void *buf, unsigned int size, bool get_value)
+{
+ Elf_Sym *sym;
+ Elf_Shdr *sechdrs;
+ struct purgatory_info *pi = &image->purgatory_info;
+ char *sym_buf;
+
+ sym = kexec_purgatory_find_symbol(pi, name);
+ if (!sym)
+ return -EINVAL;
+
+ if (sym->st_size != size) {
+ pr_err("symbol %s size mismatch: expected %lu actual %u\n",
+ name, (unsigned long)sym->st_size, size);
+ return -EINVAL;
+ }
+
+ sechdrs = pi->sechdrs;
+
+ if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
+ pr_err("symbol %s is in a bss section. Cannot %s\n", name,
+ get_value ? "get" : "set");
+ return -EINVAL;
+ }
+
+ sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
+ sym->st_value;
+
+ if (get_value)
+ memcpy((void *)buf, sym_buf, size);
+ else
+ memcpy((void *)sym_buf, buf, size);
+
+ return 0;
+}
+
/*
* Move into place and start executing a preloaded standalone
* executable. If nothing was preloaded return an error.
diff --git a/kernel/panic.c b/kernel/panic.c
index 62e16cef9cc2..d09dc5c32c67 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -224,6 +224,7 @@ static const struct tnt tnts[] = {
{ TAINT_FIRMWARE_WORKAROUND, 'I', ' ' },
{ TAINT_OOT_MODULE, 'O', ' ' },
{ TAINT_UNSIGNED_MODULE, 'E', ' ' },
+ { TAINT_SOFTLOCKUP, 'L', ' ' },
};
/**
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 13e839dbca07..5eb0e6c800bb 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -45,6 +45,7 @@
#include <linux/poll.h>
#include <linux/irq_work.h>
#include <linux/utsname.h>
+#include <linux/ctype.h>
#include <asm/uaccess.h>
@@ -56,7 +57,7 @@
int console_printk[4] = {
CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */
- DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */
+ MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */
CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */
CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */
};
@@ -113,9 +114,9 @@ static int __down_trylock_console_sem(unsigned long ip)
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
* definitely not the perfect debug tool (we don't know if _WE_
- * hold it are racing, but it helps tracking those weird code
- * path in the console code where we end up in places I want
- * locked without the console sempahore held
+ * hold it and are racing, but it helps tracking those weird code
+ * paths in the console code where we end up in places I want
+ * locked without the console sempahore held).
*/
static int console_locked, console_suspended;
@@ -146,8 +147,8 @@ static int console_may_schedule;
* the overall length of the record.
*
* The heads to the first and last entry in the buffer, as well as the
- * sequence numbers of these both entries are maintained when messages
- * are stored..
+ * sequence numbers of these entries are maintained when messages are
+ * stored.
*
* If the heads indicate available messages, the length in the header
* tells the start next message. A length == 0 for the next message
@@ -257,7 +258,7 @@ static u64 clear_seq;
static u32 clear_idx;
#define PREFIX_MAX 32
-#define LOG_LINE_MAX 1024 - PREFIX_MAX
+#define LOG_LINE_MAX (1024 - PREFIX_MAX)
/* record buffer */
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
@@ -266,6 +267,7 @@ static u32 clear_idx;
#define LOG_ALIGN __alignof__(struct printk_log)
#endif
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
+#define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)
static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;
@@ -344,7 +346,7 @@ static int log_make_free_space(u32 msg_size)
while (log_first_seq < log_next_seq) {
if (logbuf_has_space(msg_size, false))
return 0;
- /* drop old messages until we have enough continuous space */
+ /* drop old messages until we have enough contiguous space */
log_first_idx = log_next(log_first_idx);
log_first_seq++;
}
@@ -453,11 +455,7 @@ static int log_store(int facility, int level,
return msg->text_len;
}
-#ifdef CONFIG_SECURITY_DMESG_RESTRICT
-int dmesg_restrict = 1;
-#else
-int dmesg_restrict;
-#endif
+int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
static int syslog_action_restricted(int type)
{
@@ -828,34 +826,74 @@ void log_buf_kexec_setup(void)
/* requested log_buf_len from kernel cmdline */
static unsigned long __initdata new_log_buf_len;
-/* save requested log_buf_len since it's too early to process it */
-static int __init log_buf_len_setup(char *str)
+/* we practice scaling the ring buffer by powers of 2 */
+static void __init log_buf_len_update(unsigned size)
{
- unsigned size = memparse(str, &str);
-
if (size)
size = roundup_pow_of_two(size);
if (size > log_buf_len)
new_log_buf_len = size;
+}
+
+/* save requested log_buf_len since it's too early to process it */
+static int __init log_buf_len_setup(char *str)
+{
+ unsigned size = memparse(str, &str);
+
+ log_buf_len_update(size);
return 0;
}
early_param("log_buf_len", log_buf_len_setup);
+static void __init log_buf_add_cpu(void)
+{
+ unsigned int cpu_extra;
+
+ /*
+ * archs should set up cpu_possible_bits properly with
+ * set_cpu_possible() after setup_arch() but just in
+ * case lets ensure this is valid.
+ */
+ if (num_possible_cpus() == 1)
+ return;
+
+ cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;
+
+ /* by default this will only continue through for large > 64 CPUs */
+ if (cpu_extra <= __LOG_BUF_LEN / 2)
+ return;
+
+ pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
+ __LOG_CPU_MAX_BUF_LEN);
+ pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
+ cpu_extra);
+ pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);
+
+ log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
+}
+
void __init setup_log_buf(int early)
{
unsigned long flags;
char *new_log_buf;
int free;
+ if (log_buf != __log_buf)
+ return;
+
+ if (!early && !new_log_buf_len)
+ log_buf_add_cpu();
+
if (!new_log_buf_len)
return;
if (early) {
new_log_buf =
- memblock_virt_alloc(new_log_buf_len, PAGE_SIZE);
+ memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
} else {
- new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len, 0);
+ new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
+ LOG_ALIGN);
}
if (unlikely(!new_log_buf)) {
@@ -872,7 +910,7 @@ void __init setup_log_buf(int early)
memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
raw_spin_unlock_irqrestore(&logbuf_lock, flags);
- pr_info("log_buf_len: %d\n", log_buf_len);
+ pr_info("log_buf_len: %d bytes\n", log_buf_len);
pr_info("early log buf free: %d(%d%%)\n",
free, (free * 100) / __LOG_BUF_LEN);
}
@@ -947,11 +985,7 @@ static inline void boot_delay_msec(int level)
}
#endif
-#if defined(CONFIG_PRINTK_TIME)
-static bool printk_time = 1;
-#else
-static bool printk_time;
-#endif
+static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
static size_t print_time(u64 ts, char *buf)
@@ -1310,7 +1344,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
* for pending data, not the size; return the count of
* records, not the length.
*/
- error = log_next_idx - syslog_idx;
+ error = log_next_seq - syslog_seq;
} else {
u64 seq = syslog_seq;
u32 idx = syslog_idx;
@@ -1476,7 +1510,7 @@ static struct cont {
struct task_struct *owner; /* task of first print*/
u64 ts_nsec; /* time of first print */
u8 level; /* log level of first message */
- u8 facility; /* log level of first message */
+ u8 facility; /* log facility of first message */
enum log_flags flags; /* prefix, newline flags */
bool flushed:1; /* buffer sealed and committed */
} cont;
@@ -1802,7 +1836,7 @@ EXPORT_SYMBOL(printk);
#define LOG_LINE_MAX 0
#define PREFIX_MAX 0
-#define LOG_LINE_MAX 0
+
static u64 syslog_seq;
static u32 syslog_idx;
static u64 console_seq;
@@ -1881,11 +1915,12 @@ static int __add_preferred_console(char *name, int idx, char *options,
return 0;
}
/*
- * Set up a list of consoles. Called from init/main.c
+ * Set up a console. Called via do_early_param() in init/main.c
+ * for each "console=" parameter in the boot command line.
*/
static int __init console_setup(char *str)
{
- char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for index */
+ char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
char *s, *options, *brl_options = NULL;
int idx;
@@ -1902,7 +1937,8 @@ static int __init console_setup(char *str)
strncpy(buf, str, sizeof(buf) - 1);
}
buf[sizeof(buf) - 1] = 0;
- if ((options = strchr(str, ',')) != NULL)
+ options = strchr(str, ',');
+ if (options)
*(options++) = 0;
#ifdef __sparc__
if (!strcmp(str, "ttya"))
@@ -1911,7 +1947,7 @@ static int __init console_setup(char *str)
strcpy(buf, "ttyS1");
#endif
for (s = buf; *s; s++)
- if ((*s >= '0' && *s <= '9') || *s == ',')
+ if (isdigit(*s) || *s == ',')
break;
idx = simple_strtoul(s, NULL, 10);
*s = 0;
@@ -1950,7 +1986,6 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
i++, c++)
if (strcmp(c->name, name) == 0 && c->index == idx) {
strlcpy(c->name, name_new, sizeof(c->name));
- c->name[sizeof(c->name) - 1] = 0;
c->options = options;
c->index = idx_new;
return i;
@@ -2045,8 +2080,8 @@ EXPORT_SYMBOL(console_lock);
/**
* console_trylock - try to lock the console system for exclusive use.
*
- * Tried to acquire a lock which guarantees that the caller has
- * exclusive access to the console system and the console_drivers list.
+ * Try to acquire a lock which guarantees that the caller has exclusive
+ * access to the console system and the console_drivers list.
*
* returns 1 on success, and 0 on failure to acquire the lock.
*/
@@ -2618,14 +2653,13 @@ EXPORT_SYMBOL(__printk_ratelimit);
bool printk_timed_ratelimit(unsigned long *caller_jiffies,
unsigned int interval_msecs)
{
- if (*caller_jiffies == 0
- || !time_in_range(jiffies, *caller_jiffies,
- *caller_jiffies
- + msecs_to_jiffies(interval_msecs))) {
- *caller_jiffies = jiffies;
- return true;
- }
- return false;
+ unsigned long elapsed = jiffies - *caller_jiffies;
+
+ if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs))
+ return false;
+
+ *caller_jiffies = jiffies;
+ return true;
}
EXPORT_SYMBOL(printk_timed_ratelimit);
diff --git a/kernel/resource.c b/kernel/resource.c
index 3c2237ac32db..da14b8d09296 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -59,10 +59,12 @@ static DEFINE_RWLOCK(resource_lock);
static struct resource *bootmem_resource_free;
static DEFINE_SPINLOCK(bootmem_resource_lock);
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+static struct resource *next_resource(struct resource *p, bool sibling_only)
{
- struct resource *p = v;
- (*pos)++;
+ /* Caller wants to traverse through siblings only */
+ if (sibling_only)
+ return p->sibling;
+
if (p->child)
return p->child;
while (!p->sibling && p->parent)
@@ -70,6 +72,13 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
return p->sibling;
}
+static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct resource *p = v;
+ (*pos)++;
+ return (void *)next_resource(p, false);
+}
+
#ifdef CONFIG_PROC_FS
enum { MAX_IORES_LEVEL = 5 };
@@ -322,16 +331,19 @@ int release_resource(struct resource *old)
EXPORT_SYMBOL(release_resource);
-#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
/*
- * Finds the lowest memory reosurce exists within [res->start.res->end)
+ * Finds the lowest iomem reosurce exists with-in [res->start.res->end)
* the caller must specify res->start, res->end, res->flags and "name".
* If found, returns 0, res is overwritten, if not found, returns -1.
+ * This walks through whole tree and not just first level children
+ * until and unless first_level_children_only is true.
*/
-static int find_next_system_ram(struct resource *res, char *name)
+static int find_next_iomem_res(struct resource *res, char *name,
+ bool first_level_children_only)
{
resource_size_t start, end;
struct resource *p;
+ bool sibling_only = false;
BUG_ON(!res);
@@ -340,8 +352,14 @@ static int find_next_system_ram(struct resource *res, char *name)
BUG_ON(start >= end);
read_lock(&resource_lock);
- for (p = iomem_resource.child; p ; p = p->sibling) {
- /* system ram is just marked as IORESOURCE_MEM */
+
+ if (first_level_children_only) {
+ p = iomem_resource.child;
+ sibling_only = true;
+ } else
+ p = &iomem_resource;
+
+ while ((p = next_resource(p, sibling_only))) {
if (p->flags != res->flags)
continue;
if (name && strcmp(p->name, name))
@@ -353,6 +371,7 @@ static int find_next_system_ram(struct resource *res, char *name)
if ((p->end >= start) && (p->start < end))
break;
}
+
read_unlock(&resource_lock);
if (!p)
return -1;
@@ -365,6 +384,70 @@ static int find_next_system_ram(struct resource *res, char *name)
}
/*
+ * Walks through iomem resources and calls func() with matching resource
+ * ranges. This walks through whole tree and not just first level children.
+ * All the memory ranges which overlap start,end and also match flags and
+ * name are valid candidates.
+ *
+ * @name: name of resource
+ * @flags: resource flags
+ * @start: start addr
+ * @end: end addr
+ */
+int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
+ void *arg, int (*func)(u64, u64, void *))
+{
+ struct resource res;
+ u64 orig_end;
+ int ret = -1;
+
+ res.start = start;
+ res.end = end;
+ res.flags = flags;
+ orig_end = res.end;
+ while ((res.start < res.end) &&
+ (!find_next_iomem_res(&res, name, false))) {
+ ret = (*func)(res.start, res.end, arg);
+ if (ret)
+ break;
+ res.start = res.end + 1;
+ res.end = orig_end;
+ }
+ return ret;
+}
+
+/*
+ * This function calls callback against all memory range of "System RAM"
+ * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
+ * Now, this function is only for "System RAM". This function deals with
+ * full ranges and not pfn. If resources are not pfn aligned, dealing
+ * with pfn can truncate ranges.
+ */
+int walk_system_ram_res(u64 start, u64 end, void *arg,
+ int (*func)(u64, u64, void *))
+{
+ struct resource res;
+ u64 orig_end;
+ int ret = -1;
+
+ res.start = start;
+ res.end = end;
+ res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ orig_end = res.end;
+ while ((res.start < res.end) &&
+ (!find_next_iomem_res(&res, "System RAM", true))) {
+ ret = (*func)(res.start, res.end, arg);
+ if (ret)
+ break;
+ res.start = res.end + 1;
+ res.end = orig_end;
+ }
+ return ret;
+}
+
+#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
+
+/*
* This function calls callback against all memory range of "System RAM"
* which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
* Now, this function is only for "System RAM".
@@ -382,7 +465,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
orig_end = res.end;
while ((res.start < res.end) &&
- (find_next_system_ram(&res, "System RAM") >= 0)) {
+ (find_next_iomem_res(&res, "System RAM", true) >= 0)) {
pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
end_pfn = (res.end + 1) >> PAGE_SHIFT;
if (end_pfn > pfn)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 36441b51b5df..51ea89f3089c 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -25,6 +25,7 @@ cond_syscall(sys_swapon);
cond_syscall(sys_swapoff);
cond_syscall(sys_kexec_load);
cond_syscall(compat_sys_kexec_load);
+cond_syscall(sys_kexec_file_load);
cond_syscall(sys_init_module);
cond_syscall(sys_finit_module);
cond_syscall(sys_delete_module);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75b22e22a72c..75875a741b5e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1240,8 +1240,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = hugetlb_sysctl_handler,
- .extra1 = (void *)&hugetlb_zero,
- .extra2 = (void *)&hugetlb_infinity,
+ .extra1 = &zero,
},
#ifdef CONFIG_NUMA
{
@@ -1250,8 +1249,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = &hugetlb_mempolicy_sysctl_handler,
- .extra1 = (void *)&hugetlb_zero,
- .extra2 = (void *)&hugetlb_infinity,
+ .extra1 = &zero,
},
#endif
{
@@ -1274,8 +1272,7 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = hugetlb_overcommit_handler,
- .extra1 = (void *)&hugetlb_zero,
- .extra2 = (void *)&hugetlb_infinity,
+ .extra1 = &zero,
},
#endif
{
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 12d6ebbfdd83..0dbab6d1acb4 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -14,6 +14,8 @@
* the GNU General Public License for more details.
*/
+#define pr_fmt(fmt) "Kprobe smoke test: " fmt
+
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/random.h>
@@ -41,8 +43,7 @@ static void kp_post_handler(struct kprobe *p, struct pt_regs *regs,
{
if (preh_val != (rand1 / div_factor)) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in post_handler\n");
+ pr_err("incorrect value in post_handler\n");
}
posth_val = preh_val + div_factor;
}
@@ -59,8 +60,7 @@ static int test_kprobe(void)
ret = register_kprobe(&kp);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kprobe returned %d\n", ret);
+ pr_err("register_kprobe returned %d\n", ret);
return ret;
}
@@ -68,14 +68,12 @@ static int test_kprobe(void)
unregister_kprobe(&kp);
if (preh_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe pre_handler not called\n");
+ pr_err("kprobe pre_handler not called\n");
handler_errors++;
}
if (posth_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe post_handler not called\n");
+ pr_err("kprobe post_handler not called\n");
handler_errors++;
}
@@ -98,8 +96,7 @@ static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs,
{
if (preh_val != (rand1 / div_factor) + 1) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in post_handler2\n");
+ pr_err("incorrect value in post_handler2\n");
}
posth_val = preh_val + div_factor;
}
@@ -120,8 +117,7 @@ static int test_kprobes(void)
kp.flags = 0;
ret = register_kprobes(kps, 2);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kprobes returned %d\n", ret);
+ pr_err("register_kprobes returned %d\n", ret);
return ret;
}
@@ -130,14 +126,12 @@ static int test_kprobes(void)
ret = target(rand1);
if (preh_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe pre_handler not called\n");
+ pr_err("kprobe pre_handler not called\n");
handler_errors++;
}
if (posth_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe post_handler not called\n");
+ pr_err("kprobe post_handler not called\n");
handler_errors++;
}
@@ -146,14 +140,12 @@ static int test_kprobes(void)
ret = target2(rand1);
if (preh_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe pre_handler2 not called\n");
+ pr_err("kprobe pre_handler2 not called\n");
handler_errors++;
}
if (posth_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kprobe post_handler2 not called\n");
+ pr_err("kprobe post_handler2 not called\n");
handler_errors++;
}
@@ -166,8 +158,7 @@ static u32 j_kprobe_target(u32 value)
{
if (value != rand1) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in jprobe handler\n");
+ pr_err("incorrect value in jprobe handler\n");
}
jph_val = rand1;
@@ -186,16 +177,14 @@ static int test_jprobe(void)
ret = register_jprobe(&jp);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_jprobe returned %d\n", ret);
+ pr_err("register_jprobe returned %d\n", ret);
return ret;
}
ret = target(rand1);
unregister_jprobe(&jp);
if (jph_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "jprobe handler not called\n");
+ pr_err("jprobe handler not called\n");
handler_errors++;
}
@@ -217,24 +206,21 @@ static int test_jprobes(void)
jp.kp.flags = 0;
ret = register_jprobes(jps, 2);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_jprobes returned %d\n", ret);
+ pr_err("register_jprobes returned %d\n", ret);
return ret;
}
jph_val = 0;
ret = target(rand1);
if (jph_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "jprobe handler not called\n");
+ pr_err("jprobe handler not called\n");
handler_errors++;
}
jph_val = 0;
ret = target2(rand1);
if (jph_val == 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "jprobe handler2 not called\n");
+ pr_err("jprobe handler2 not called\n");
handler_errors++;
}
unregister_jprobes(jps, 2);
@@ -256,13 +242,11 @@ static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
if (ret != (rand1 / div_factor)) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in kretprobe handler\n");
+ pr_err("incorrect value in kretprobe handler\n");
}
if (krph_val == 0) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "call to kretprobe entry handler failed\n");
+ pr_err("call to kretprobe entry handler failed\n");
}
krph_val = rand1;
@@ -281,16 +265,14 @@ static int test_kretprobe(void)
ret = register_kretprobe(&rp);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kretprobe returned %d\n", ret);
+ pr_err("register_kretprobe returned %d\n", ret);
return ret;
}
ret = target(rand1);
unregister_kretprobe(&rp);
if (krph_val != rand1) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kretprobe handler not called\n");
+ pr_err("kretprobe handler not called\n");
handler_errors++;
}
@@ -303,13 +285,11 @@ static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs)
if (ret != (rand1 / div_factor) + 1) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "incorrect value in kretprobe handler2\n");
+ pr_err("incorrect value in kretprobe handler2\n");
}
if (krph_val == 0) {
handler_errors++;
- printk(KERN_ERR "Kprobe smoke test failed: "
- "call to kretprobe entry handler failed\n");
+ pr_err("call to kretprobe entry handler failed\n");
}
krph_val = rand1;
@@ -332,24 +312,21 @@ static int test_kretprobes(void)
rp.kp.flags = 0;
ret = register_kretprobes(rps, 2);
if (ret < 0) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "register_kretprobe returned %d\n", ret);
+ pr_err("register_kretprobe returned %d\n", ret);
return ret;
}
krph_val = 0;
ret = target(rand1);
if (krph_val != rand1) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kretprobe handler not called\n");
+ pr_err("kretprobe handler not called\n");
handler_errors++;
}
krph_val = 0;
ret = target2(rand1);
if (krph_val != rand1) {
- printk(KERN_ERR "Kprobe smoke test failed: "
- "kretprobe handler2 not called\n");
+ pr_err("kretprobe handler2 not called\n");
handler_errors++;
}
unregister_kretprobes(rps, 2);
@@ -368,7 +345,7 @@ int init_test_probes(void)
rand1 = prandom_u32();
} while (rand1 <= div_factor);
- printk(KERN_INFO "Kprobe smoke test started\n");
+ pr_info("started\n");
num_tests++;
ret = test_kprobe();
if (ret < 0)
@@ -402,13 +379,11 @@ int init_test_probes(void)
#endif /* CONFIG_KRETPROBES */
if (errors)
- printk(KERN_ERR "BUG: Kprobe smoke test: %d out of "
- "%d tests failed\n", errors, num_tests);
+ pr_err("BUG: %d out of %d tests failed\n", errors, num_tests);
else if (handler_errors)
- printk(KERN_ERR "BUG: Kprobe smoke test: %d error(s) "
- "running handlers\n", handler_errors);
+ pr_err("BUG: %d error(s) running handlers\n", handler_errors);
else
- printk(KERN_INFO "Kprobe smoke test passed successfully\n");
+ pr_info("passed successfully\n");
return 0;
}
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index fcc02560fd6b..aa312b0dc3ec 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -526,21 +526,21 @@ static void m_stop(struct seq_file *seq, void *v)
return;
}
-struct seq_operations proc_uid_seq_operations = {
+const struct seq_operations proc_uid_seq_operations = {
.start = uid_m_start,
.stop = m_stop,
.next = m_next,
.show = uid_m_show,
};
-struct seq_operations proc_gid_seq_operations = {
+const struct seq_operations proc_gid_seq_operations = {
.start = gid_m_start,
.stop = m_stop,
.next = m_next,
.show = gid_m_show,
};
-struct seq_operations proc_projid_seq_operations = {
+const struct seq_operations proc_projid_seq_operations = {
.start = projid_m_start,
.stop = m_stop,
.next = m_next,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index c3319bd1b040..a8d6914030fe 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -260,9 +260,11 @@ static void watchdog_overflow_callback(struct perf_event *event,
return;
if (hardlockup_panic)
- panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ panic("Watchdog detected hard LOCKUP on cpu %d",
+ this_cpu);
else
- WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
+ this_cpu);
__this_cpu_write(hard_watchdog_warn, true);
return;
@@ -345,7 +347,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
}
}
- printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+ pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
print_modules();
@@ -366,6 +368,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
smp_mb__after_atomic();
}
+ add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
__this_cpu_write(soft_watchdog_warn, true);
@@ -484,7 +487,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
if (PTR_ERR(event) == -EOPNOTSUPP)
pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
else if (PTR_ERR(event) == -ENOENT)
- pr_warning("disabled (cpu%i): hardware events not enabled\n",
+ pr_warn("disabled (cpu%i): hardware events not enabled\n",
cpu);
else
pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
diff --git a/lib/Kconfig b/lib/Kconfig
index 334f7722a999..fdf90f3aa90d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -177,6 +177,13 @@ config CRC8
when they need to do cyclic redundancy check according CRC8
algorithm. Module will be called crc8.
+config CRC64_ECMA
+ tristate "CRC64 ECMA function"
+ help
+ This option provides CRC64 ECMA function. Drivers may select this
+ when they need to do cyclic redundancy check according to the CRC64
+ ECMA algorithm.
+
config AUDIT_GENERIC
bool
depends on AUDIT && !AUDIT_ARCH
@@ -396,6 +403,39 @@ config CPU_RMAP
config DQL
bool
+config GLOB
+ bool
+# This actually supports modular compilation, but the module overhead
+# is ridiculous for the amount of code involved. Until an out-of-tree
+# driver asks for it, we'll just link it directly it into the kernel
+# when required. Since we're ignoring out-of-tree users, there's also
+# no need bother prompting for a manual decision:
+# prompt "glob_match() function"
+ help
+ This option provides a glob_match function for performing
+ simple text pattern matching. It originated in the ATA code
+ to blacklist particular drive models, but other device drivers
+ may need similar functionality.
+
+ All drivers in the Linux kernel tree that require this function
+ should automatically select this option. Say N unless you
+ are compiling an out-of tree driver which tells you that it
+ depends on this.
+
+config GLOB_SELFTEST
+ bool "glob self-test on init"
+ default n
+ depends on GLOB
+ help
+ This option enables a simple self-test of the glob_match
+ function on startup. It is primarily useful for people
+ working on the code to ensure they haven't introduced any
+ regressions.
+
+ It only adds a little bit of code and slows kernel boot (or
+ module load) by a small amount, so you're welcome to play with
+ it, but you probably don't need it.
+
#
# Netlink attribute parsing support is select'ed if needed
#
@@ -474,4 +514,11 @@ config UCS2_STRING
source "lib/fonts/Kconfig"
+#
+# sg chaining option
+#
+
+config ARCH_HAS_SG_CHAIN
+ def_bool n
+
endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4fc8e743afdf..6b62a765a92f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -15,7 +15,7 @@ config PRINTK_TIME
The behavior is also controlled by the kernel command line
parameter printk.time=1. See Documentation/kernel-parameters.txt
-config DEFAULT_MESSAGE_LOGLEVEL
+config MESSAGE_LOGLEVEL_DEFAULT
int "Default message log level (1-7)"
range 1 7
default "4"
diff --git a/lib/Makefile b/lib/Makefile
index ba967a19edba..e48067c304d7 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_CRC32) += crc32.o
obj-$(CONFIG_CRC7) += crc7.o
obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
obj-$(CONFIG_CRC8) += crc8.o
+obj-$(CONFIG_CRC64_ECMA) += crc64_ecma.o
obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
@@ -136,6 +137,8 @@ obj-$(CONFIG_CORDIC) += cordic.o
obj-$(CONFIG_DQL) += dynamic_queue_limits.o
+obj-$(CONFIG_GLOB) += glob.o
+
obj-$(CONFIG_MPILIB) += mpi/
obj-$(CONFIG_SIGNATURE) += digsig.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06f7e4fe8d2d..1e031f2c9aba 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -40,9 +40,9 @@
* for the best explanations of this ordering.
*/
-int __bitmap_empty(const unsigned long *bitmap, int bits)
+int __bitmap_empty(const unsigned long *bitmap, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap[k])
return 0;
@@ -55,9 +55,9 @@ int __bitmap_empty(const unsigned long *bitmap, int bits)
}
EXPORT_SYMBOL(__bitmap_empty);
-int __bitmap_full(const unsigned long *bitmap, int bits)
+int __bitmap_full(const unsigned long *bitmap, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (~bitmap[k])
return 0;
@@ -71,9 +71,9 @@ int __bitmap_full(const unsigned long *bitmap, int bits)
EXPORT_SYMBOL(__bitmap_full);
int __bitmap_equal(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap1[k] != bitmap2[k])
return 0;
@@ -86,14 +86,14 @@ int __bitmap_equal(const unsigned long *bitmap1,
}
EXPORT_SYMBOL(__bitmap_equal);
-void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
+void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
dst[k] = ~src[k];
if (bits % BITS_PER_LONG)
- dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+ dst[k] = ~src[k];
}
EXPORT_SYMBOL(__bitmap_complement);
@@ -182,23 +182,26 @@ void __bitmap_shift_left(unsigned long *dst,
EXPORT_SYMBOL(__bitmap_shift_left);
int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k;
- int nr = BITS_TO_LONGS(bits);
+ unsigned int k;
+ unsigned int lim = bits/BITS_PER_LONG;
unsigned long result = 0;
- for (k = 0; k < nr; k++)
+ for (k = 0; k < lim; k++)
result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+ if (bits % BITS_PER_LONG)
+ result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+ BITMAP_LAST_WORD_MASK(bits));
return result != 0;
}
EXPORT_SYMBOL(__bitmap_and);
void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k;
- int nr = BITS_TO_LONGS(bits);
+ unsigned int k;
+ unsigned int nr = BITS_TO_LONGS(bits);
for (k = 0; k < nr; k++)
dst[k] = bitmap1[k] | bitmap2[k];
@@ -206,10 +209,10 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
EXPORT_SYMBOL(__bitmap_or);
void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k;
- int nr = BITS_TO_LONGS(bits);
+ unsigned int k;
+ unsigned int nr = BITS_TO_LONGS(bits);
for (k = 0; k < nr; k++)
dst[k] = bitmap1[k] ^ bitmap2[k];
@@ -217,22 +220,25 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
EXPORT_SYMBOL(__bitmap_xor);
int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k;
- int nr = BITS_TO_LONGS(bits);
+ unsigned int k;
+ unsigned int lim = bits/BITS_PER_LONG;
unsigned long result = 0;
- for (k = 0; k < nr; k++)
+ for (k = 0; k < lim; k++)
result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+ if (bits % BITS_PER_LONG)
+ result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
+ BITMAP_LAST_WORD_MASK(bits));
return result != 0;
}
EXPORT_SYMBOL(__bitmap_andnot);
int __bitmap_intersects(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap1[k] & bitmap2[k])
return 1;
@@ -245,9 +251,9 @@ int __bitmap_intersects(const unsigned long *bitmap1,
EXPORT_SYMBOL(__bitmap_intersects);
int __bitmap_subset(const unsigned long *bitmap1,
- const unsigned long *bitmap2, int bits)
+ const unsigned long *bitmap2, unsigned int bits)
{
- int k, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
for (k = 0; k < lim; ++k)
if (bitmap1[k] & ~bitmap2[k])
return 0;
@@ -259,9 +265,10 @@ int __bitmap_subset(const unsigned long *bitmap1,
}
EXPORT_SYMBOL(__bitmap_subset);
-int __bitmap_weight(const unsigned long *bitmap, int bits)
+int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
{
- int k, w = 0, lim = bits/BITS_PER_LONG;
+ unsigned int k, lim = bits/BITS_PER_LONG;
+ int w = 0;
for (k = 0; k < lim; k++)
w += hweight_long(bitmap[k]);
@@ -273,42 +280,42 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
}
EXPORT_SYMBOL(__bitmap_weight);
-void bitmap_set(unsigned long *map, int start, int nr)
+void bitmap_set(unsigned long *map, unsigned int start, int len)
{
unsigned long *p = map + BIT_WORD(start);
- const int size = start + nr;
+ const unsigned int size = start + len;
int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
- while (nr - bits_to_set >= 0) {
+ while (len - bits_to_set >= 0) {
*p |= mask_to_set;
- nr -= bits_to_set;
+ len -= bits_to_set;
bits_to_set = BITS_PER_LONG;
mask_to_set = ~0UL;
p++;
}
- if (nr) {
+ if (len) {
mask_to_set &= BITMAP_LAST_WORD_MASK(size);
*p |= mask_to_set;
}
}
EXPORT_SYMBOL(bitmap_set);
-void bitmap_clear(unsigned long *map, int start, int nr)
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
{
unsigned long *p = map + BIT_WORD(start);
- const int size = start + nr;
+ const unsigned int size = start + len;
int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
- while (nr - bits_to_clear >= 0) {
+ while (len - bits_to_clear >= 0) {
*p &= ~mask_to_clear;
- nr -= bits_to_clear;
+ len -= bits_to_clear;
bits_to_clear = BITS_PER_LONG;
mask_to_clear = ~0UL;
p++;
}
- if (nr) {
+ if (len) {
mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
*p &= ~mask_to_clear;
}
@@ -664,13 +671,8 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
{
- char *nl = strchr(bp, '\n');
- int len;
-
- if (nl)
- len = nl - bp;
- else
- len = strlen(bp);
+ char *nl = strchrnul(bp, '\n');
+ int len = nl - bp;
return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
}
@@ -716,7 +718,7 @@ EXPORT_SYMBOL(bitmap_parselist_user);
*
* If for example, just bits 4 through 7 are set in @buf, then @pos
* values 4 through 7 will get mapped to 0 through 3, respectively,
- * and other @pos values will get mapped to 0. When @pos value 7
+ * and other @pos values will get mapped to -1. When @pos value 7
* gets mapped to (returns) @ord value 3 in this example, that means
* that bit 7 is the 3rd (starting with 0th) set bit in @buf.
*
@@ -1046,7 +1048,7 @@ enum {
REG_OP_RELEASE, /* clear all bits in region */
};
-static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
+static int __reg_op(unsigned long *bitmap, unsigned int pos, int order, int reg_op)
{
int nbits_reg; /* number of bits in region */
int index; /* index first long of region in bitmap */
@@ -1112,11 +1114,11 @@ done:
* Return the bit offset in bitmap of the allocated region,
* or -errno on failure.
*/
-int bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
{
- int pos, end; /* scans bitmap by regions of size order */
+ unsigned int pos, end; /* scans bitmap by regions of size order */
- for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
+ for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) {
if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
continue;
__reg_op(bitmap, pos, order, REG_OP_ALLOC);
@@ -1137,7 +1139,7 @@ EXPORT_SYMBOL(bitmap_find_free_region);
*
* No return value.
*/
-void bitmap_release_region(unsigned long *bitmap, int pos, int order)
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
{
__reg_op(bitmap, pos, order, REG_OP_RELEASE);
}
@@ -1154,12 +1156,11 @@ EXPORT_SYMBOL(bitmap_release_region);
* Return 0 on success, or %-EBUSY if specified region wasn't
* free (not all bits were zero).
*/
-int bitmap_allocate_region(unsigned long *bitmap, int pos, int order)
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
{
if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
return -EBUSY;
- __reg_op(bitmap, pos, order, REG_OP_ALLOC);
- return 0;
+ return __reg_op(bitmap, pos, order, REG_OP_ALLOC);
}
EXPORT_SYMBOL(bitmap_allocate_region);
diff --git a/lib/cmdline.c b/lib/cmdline.c
index d4932f745e92..76a712e6e20e 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -121,11 +121,7 @@ EXPORT_SYMBOL(get_options);
* @retptr: (output) Optional pointer to next char after parse completes
*
* Parses a string into a number. The number stored at @ptr is
- * potentially suffixed with %K (for kilobytes, or 1024 bytes),
- * %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
- * 1073741824). If the number is suffixed with K, M, or G, then
- * the return value is the number multiplied by one kilobyte, one
- * megabyte, or one gigabyte, respectively.
+ * potentially suffixed with K, M, G, T, P, E.
*/
unsigned long long memparse(const char *ptr, char **retptr)
@@ -135,6 +131,15 @@ unsigned long long memparse(const char *ptr, char **retptr)
unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
switch (*endptr) {
+ case 'E':
+ case 'e':
+ ret <<= 10;
+ case 'P':
+ case 'p':
+ ret <<= 10;
+ case 'T':
+ case 't':
+ ret <<= 10;
case 'G':
case 'g':
ret <<= 10;
diff --git a/lib/crc64_ecma.c b/lib/crc64_ecma.c
new file mode 100644
index 000000000000..41629ea5a60c
--- /dev/null
+++ b/lib/crc64_ecma.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/crc64_ecma.h>
+
+
+#define CRC64_BYTE_MASK 0xFF
+#define CRC64_TABLE_SIZE 256
+
+
+struct crc64_table {
+ u64 seed;
+ u64 table[CRC64_TABLE_SIZE];
+};
+
+
+static struct crc64_table CRC64_ECMA_182 = {
+ CRC64_DEFAULT_INITVAL,
+ {
+ 0x0000000000000000ULL,
+ 0xb32e4cbe03a75f6fULL,
+ 0xf4843657a840a05bULL,
+ 0x47aa7ae9abe7ff34ULL,
+ 0x7bd0c384ff8f5e33ULL,
+ 0xc8fe8f3afc28015cULL,
+ 0x8f54f5d357cffe68ULL,
+ 0x3c7ab96d5468a107ULL,
+ 0xf7a18709ff1ebc66ULL,
+ 0x448fcbb7fcb9e309ULL,
+ 0x0325b15e575e1c3dULL,
+ 0xb00bfde054f94352ULL,
+ 0x8c71448d0091e255ULL,
+ 0x3f5f08330336bd3aULL,
+ 0x78f572daa8d1420eULL,
+ 0xcbdb3e64ab761d61ULL,
+ 0x7d9ba13851336649ULL,
+ 0xceb5ed8652943926ULL,
+ 0x891f976ff973c612ULL,
+ 0x3a31dbd1fad4997dULL,
+ 0x064b62bcaebc387aULL,
+ 0xb5652e02ad1b6715ULL,
+ 0xf2cf54eb06fc9821ULL,
+ 0x41e11855055bc74eULL,
+ 0x8a3a2631ae2dda2fULL,
+ 0x39146a8fad8a8540ULL,
+ 0x7ebe1066066d7a74ULL,
+ 0xcd905cd805ca251bULL,
+ 0xf1eae5b551a2841cULL,
+ 0x42c4a90b5205db73ULL,
+ 0x056ed3e2f9e22447ULL,
+ 0xb6409f5cfa457b28ULL,
+ 0xfb374270a266cc92ULL,
+ 0x48190ecea1c193fdULL,
+ 0x0fb374270a266cc9ULL,
+ 0xbc9d3899098133a6ULL,
+ 0x80e781f45de992a1ULL,
+ 0x33c9cd4a5e4ecdceULL,
+ 0x7463b7a3f5a932faULL,
+ 0xc74dfb1df60e6d95ULL,
+ 0x0c96c5795d7870f4ULL,
+ 0xbfb889c75edf2f9bULL,
+ 0xf812f32ef538d0afULL,
+ 0x4b3cbf90f69f8fc0ULL,
+ 0x774606fda2f72ec7ULL,
+ 0xc4684a43a15071a8ULL,
+ 0x83c230aa0ab78e9cULL,
+ 0x30ec7c140910d1f3ULL,
+ 0x86ace348f355aadbULL,
+ 0x3582aff6f0f2f5b4ULL,
+ 0x7228d51f5b150a80ULL,
+ 0xc10699a158b255efULL,
+ 0xfd7c20cc0cdaf4e8ULL,
+ 0x4e526c720f7dab87ULL,
+ 0x09f8169ba49a54b3ULL,
+ 0xbad65a25a73d0bdcULL,
+ 0x710d64410c4b16bdULL,
+ 0xc22328ff0fec49d2ULL,
+ 0x85895216a40bb6e6ULL,
+ 0x36a71ea8a7ace989ULL,
+ 0x0adda7c5f3c4488eULL,
+ 0xb9f3eb7bf06317e1ULL,
+ 0xfe5991925b84e8d5ULL,
+ 0x4d77dd2c5823b7baULL,
+ 0x64b62bcaebc387a1ULL,
+ 0xd7986774e864d8ceULL,
+ 0x90321d9d438327faULL,
+ 0x231c512340247895ULL,
+ 0x1f66e84e144cd992ULL,
+ 0xac48a4f017eb86fdULL,
+ 0xebe2de19bc0c79c9ULL,
+ 0x58cc92a7bfab26a6ULL,
+ 0x9317acc314dd3bc7ULL,
+ 0x2039e07d177a64a8ULL,
+ 0x67939a94bc9d9b9cULL,
+ 0xd4bdd62abf3ac4f3ULL,
+ 0xe8c76f47eb5265f4ULL,
+ 0x5be923f9e8f53a9bULL,
+ 0x1c4359104312c5afULL,
+ 0xaf6d15ae40b59ac0ULL,
+ 0x192d8af2baf0e1e8ULL,
+ 0xaa03c64cb957be87ULL,
+ 0xeda9bca512b041b3ULL,
+ 0x5e87f01b11171edcULL,
+ 0x62fd4976457fbfdbULL,
+ 0xd1d305c846d8e0b4ULL,
+ 0x96797f21ed3f1f80ULL,
+ 0x2557339fee9840efULL,
+ 0xee8c0dfb45ee5d8eULL,
+ 0x5da24145464902e1ULL,
+ 0x1a083bacedaefdd5ULL,
+ 0xa9267712ee09a2baULL,
+ 0x955cce7fba6103bdULL,
+ 0x267282c1b9c65cd2ULL,
+ 0x61d8f8281221a3e6ULL,
+ 0xd2f6b4961186fc89ULL,
+ 0x9f8169ba49a54b33ULL,
+ 0x2caf25044a02145cULL,
+ 0x6b055fede1e5eb68ULL,
+ 0xd82b1353e242b407ULL,
+ 0xe451aa3eb62a1500ULL,
+ 0x577fe680b58d4a6fULL,
+ 0x10d59c691e6ab55bULL,
+ 0xa3fbd0d71dcdea34ULL,
+ 0x6820eeb3b6bbf755ULL,
+ 0xdb0ea20db51ca83aULL,
+ 0x9ca4d8e41efb570eULL,
+ 0x2f8a945a1d5c0861ULL,
+ 0x13f02d374934a966ULL,
+ 0xa0de61894a93f609ULL,
+ 0xe7741b60e174093dULL,
+ 0x545a57dee2d35652ULL,
+ 0xe21ac88218962d7aULL,
+ 0x5134843c1b317215ULL,
+ 0x169efed5b0d68d21ULL,
+ 0xa5b0b26bb371d24eULL,
+ 0x99ca0b06e7197349ULL,
+ 0x2ae447b8e4be2c26ULL,
+ 0x6d4e3d514f59d312ULL,
+ 0xde6071ef4cfe8c7dULL,
+ 0x15bb4f8be788911cULL,
+ 0xa6950335e42fce73ULL,
+ 0xe13f79dc4fc83147ULL,
+ 0x521135624c6f6e28ULL,
+ 0x6e6b8c0f1807cf2fULL,
+ 0xdd45c0b11ba09040ULL,
+ 0x9aefba58b0476f74ULL,
+ 0x29c1f6e6b3e0301bULL,
+ 0xc96c5795d7870f42ULL,
+ 0x7a421b2bd420502dULL,
+ 0x3de861c27fc7af19ULL,
+ 0x8ec62d7c7c60f076ULL,
+ 0xb2bc941128085171ULL,
+ 0x0192d8af2baf0e1eULL,
+ 0x4638a2468048f12aULL,
+ 0xf516eef883efae45ULL,
+ 0x3ecdd09c2899b324ULL,
+ 0x8de39c222b3eec4bULL,
+ 0xca49e6cb80d9137fULL,
+ 0x7967aa75837e4c10ULL,
+ 0x451d1318d716ed17ULL,
+ 0xf6335fa6d4b1b278ULL,
+ 0xb199254f7f564d4cULL,
+ 0x02b769f17cf11223ULL,
+ 0xb4f7f6ad86b4690bULL,
+ 0x07d9ba1385133664ULL,
+ 0x4073c0fa2ef4c950ULL,
+ 0xf35d8c442d53963fULL,
+ 0xcf273529793b3738ULL,
+ 0x7c0979977a9c6857ULL,
+ 0x3ba3037ed17b9763ULL,
+ 0x888d4fc0d2dcc80cULL,
+ 0x435671a479aad56dULL,
+ 0xf0783d1a7a0d8a02ULL,
+ 0xb7d247f3d1ea7536ULL,
+ 0x04fc0b4dd24d2a59ULL,
+ 0x3886b22086258b5eULL,
+ 0x8ba8fe9e8582d431ULL,
+ 0xcc0284772e652b05ULL,
+ 0x7f2cc8c92dc2746aULL,
+ 0x325b15e575e1c3d0ULL,
+ 0x8175595b76469cbfULL,
+ 0xc6df23b2dda1638bULL,
+ 0x75f16f0cde063ce4ULL,
+ 0x498bd6618a6e9de3ULL,
+ 0xfaa59adf89c9c28cULL,
+ 0xbd0fe036222e3db8ULL,
+ 0x0e21ac88218962d7ULL,
+ 0xc5fa92ec8aff7fb6ULL,
+ 0x76d4de52895820d9ULL,
+ 0x317ea4bb22bfdfedULL,
+ 0x8250e80521188082ULL,
+ 0xbe2a516875702185ULL,
+ 0x0d041dd676d77eeaULL,
+ 0x4aae673fdd3081deULL,
+ 0xf9802b81de97deb1ULL,
+ 0x4fc0b4dd24d2a599ULL,
+ 0xfceef8632775faf6ULL,
+ 0xbb44828a8c9205c2ULL,
+ 0x086ace348f355aadULL,
+ 0x34107759db5dfbaaULL,
+ 0x873e3be7d8faa4c5ULL,
+ 0xc094410e731d5bf1ULL,
+ 0x73ba0db070ba049eULL,
+ 0xb86133d4dbcc19ffULL,
+ 0x0b4f7f6ad86b4690ULL,
+ 0x4ce50583738cb9a4ULL,
+ 0xffcb493d702be6cbULL,
+ 0xc3b1f050244347ccULL,
+ 0x709fbcee27e418a3ULL,
+ 0x3735c6078c03e797ULL,
+ 0x841b8ab98fa4b8f8ULL,
+ 0xadda7c5f3c4488e3ULL,
+ 0x1ef430e13fe3d78cULL,
+ 0x595e4a08940428b8ULL,
+ 0xea7006b697a377d7ULL,
+ 0xd60abfdbc3cbd6d0ULL,
+ 0x6524f365c06c89bfULL,
+ 0x228e898c6b8b768bULL,
+ 0x91a0c532682c29e4ULL,
+ 0x5a7bfb56c35a3485ULL,
+ 0xe955b7e8c0fd6beaULL,
+ 0xaeffcd016b1a94deULL,
+ 0x1dd181bf68bdcbb1ULL,
+ 0x21ab38d23cd56ab6ULL,
+ 0x9285746c3f7235d9ULL,
+ 0xd52f0e859495caedULL,
+ 0x6601423b97329582ULL,
+ 0xd041dd676d77eeaaULL,
+ 0x636f91d96ed0b1c5ULL,
+ 0x24c5eb30c5374ef1ULL,
+ 0x97eba78ec690119eULL,
+ 0xab911ee392f8b099ULL,
+ 0x18bf525d915feff6ULL,
+ 0x5f1528b43ab810c2ULL,
+ 0xec3b640a391f4fadULL,
+ 0x27e05a6e926952ccULL,
+ 0x94ce16d091ce0da3ULL,
+ 0xd3646c393a29f297ULL,
+ 0x604a2087398eadf8ULL,
+ 0x5c3099ea6de60cffULL,
+ 0xef1ed5546e415390ULL,
+ 0xa8b4afbdc5a6aca4ULL,
+ 0x1b9ae303c601f3cbULL,
+ 0x56ed3e2f9e224471ULL,
+ 0xe5c372919d851b1eULL,
+ 0xa26908783662e42aULL,
+ 0x114744c635c5bb45ULL,
+ 0x2d3dfdab61ad1a42ULL,
+ 0x9e13b115620a452dULL,
+ 0xd9b9cbfcc9edba19ULL,
+ 0x6a978742ca4ae576ULL,
+ 0xa14cb926613cf817ULL,
+ 0x1262f598629ba778ULL,
+ 0x55c88f71c97c584cULL,
+ 0xe6e6c3cfcadb0723ULL,
+ 0xda9c7aa29eb3a624ULL,
+ 0x69b2361c9d14f94bULL,
+ 0x2e184cf536f3067fULL,
+ 0x9d36004b35545910ULL,
+ 0x2b769f17cf112238ULL,
+ 0x9858d3a9ccb67d57ULL,
+ 0xdff2a94067518263ULL,
+ 0x6cdce5fe64f6dd0cULL,
+ 0x50a65c93309e7c0bULL,
+ 0xe388102d33392364ULL,
+ 0xa4226ac498dedc50ULL,
+ 0x170c267a9b79833fULL,
+ 0xdcd7181e300f9e5eULL,
+ 0x6ff954a033a8c131ULL,
+ 0x28532e49984f3e05ULL,
+ 0x9b7d62f79be8616aULL,
+ 0xa707db9acf80c06dULL,
+ 0x14299724cc279f02ULL,
+ 0x5383edcd67c06036ULL,
+ 0xe0ada17364673f59ULL
+ }
+};
+
+
+/*
+ * crc64_ecma_seed - Initializes the CRC64 ECMA seed.
+ */
+u64 crc64_ecma_seed(void)
+{
+ return CRC64_ECMA_182.seed;
+}
+EXPORT_SYMBOL(crc64_ecma_seed);
+
+/*
+ * crc64_ecma - Computes the 64 bit ECMA CRC.
+ *
+ * pdata: pointer to the data to compute checksum for.
+ * nbytes: number of bytes in data buffer.
+ * seed: CRC seed.
+ */
+u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed)
+{
+ unsigned int i;
+ u64 crc = seed;
+
+ for (i = 0; i < nbytes; i++)
+ crc = CRC64_ECMA_182.table[(crc ^ pdata[i]) & CRC64_BYTE_MASK] ^
+ (crc >> 8);
+
+ return crc;
+}
+EXPORT_SYMBOL(crc64_ecma);
+
+MODULE_DESCRIPTION("CRC64 ECMA function");
+MODULE_AUTHOR("Freescale Semiconductor Inc.");
+MODULE_LICENSE("GPL");
diff --git a/lib/decompress.c b/lib/decompress.c
index 86069d74c062..37f3c786348f 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -54,7 +54,7 @@ static const struct compress_format compressed_formats[] __initconst = {
{ {0, 0}, NULL, NULL }
};
-decompress_fn __init decompress_method(const unsigned char *inbuf, int len,
+decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
const char **name)
{
const struct compress_format *cf;
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 31c5f7675fbf..8290e0bef7ea 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -92,8 +92,8 @@ struct bunzip_data {
/* State for interrupting output loop */
int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
/* I/O tracking data (file handles, buffers, positions, etc.) */
- int (*fill)(void*, unsigned int);
- int inbufCount, inbufPos /*, outbufPos*/;
+ long (*fill)(void*, unsigned long);
+ long inbufCount, inbufPos /*, outbufPos*/;
unsigned char *inbuf /*,*outbuf*/;
unsigned int inbufBitCount, inbufBits;
/* The CRC values stored in the block header and calculated from the
@@ -617,7 +617,7 @@ decode_next_byte:
goto decode_next_byte;
}
-static int INIT nofill(void *buf, unsigned int len)
+static long INIT nofill(void *buf, unsigned long len)
{
return -1;
}
@@ -625,8 +625,8 @@ static int INIT nofill(void *buf, unsigned int len)
/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain
a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
ignored, and data is read from file handle into temporary buffer. */
-static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
- int (*fill)(void*, unsigned int))
+static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len,
+ long (*fill)(void*, unsigned long))
{
struct bunzip_data *bd;
unsigned int i, j, c;
@@ -675,11 +675,11 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,
not end of file.) */
-STATIC int INIT bunzip2(unsigned char *buf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT bunzip2(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *outbuf,
- int *pos,
+ long *pos,
void(*error)(char *x))
{
struct bunzip_data *bd;
@@ -743,11 +743,11 @@ exit_0:
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *outbuf,
- int *pos,
+ long *pos,
void(*error)(char *x))
{
return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index 0edfd742a154..d4c7891635ec 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -27,17 +27,17 @@
#define GZIP_IOBUF_SIZE (16*1024)
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
{
return -1;
}
/* Included from initramfs et al code */
-STATIC int INIT gunzip(unsigned char *buf, int len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT gunzip(unsigned char *buf, long len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *out_buf,
- int *pos,
+ long *pos,
void(*error)(char *x)) {
u8 *zbuf;
struct z_stream_s *strm;
@@ -142,7 +142,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
/* Write any data generated */
if (flush && strm->next_out > out_buf) {
- int l = strm->next_out - out_buf;
+ long l = strm->next_out - out_buf;
if (l != flush(out_buf, l)) {
rc = -1;
error("write error");
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 7d1e83caf8ad..40f66ebe57b7 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -31,10 +31,10 @@
#define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20)
#define ARCHIVE_MAGICNUMBER 0x184C2102
-STATIC inline int INIT unlz4(u8 *input, int in_len,
- int (*fill) (void *, unsigned int),
- int (*flush) (void *, unsigned int),
- u8 *output, int *posp,
+STATIC inline int INIT unlz4(u8 *input, long in_len,
+ long (*fill)(void *, unsigned long),
+ long (*flush)(void *, unsigned long),
+ u8 *output, long *posp,
void (*error) (char *x))
{
int ret = -1;
@@ -43,7 +43,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
u8 *inp;
u8 *inp_start;
u8 *outp;
- int size = in_len;
+ long size = in_len;
#ifdef PREBOOT
size_t out_len = get_unaligned_le32(input + in_len);
#endif
@@ -83,13 +83,20 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
if (posp)
*posp = 0;
- if (fill)
- fill(inp, 4);
+ if (fill) {
+ size = fill(inp, 4);
+ if (size < 4) {
+ error("data corrupted");
+ goto exit_2;
+ }
+ }
chunksize = get_unaligned_le32(inp);
if (chunksize == ARCHIVE_MAGICNUMBER) {
- inp += 4;
- size -= 4;
+ if (!fill) {
+ inp += 4;
+ size -= 4;
+ }
} else {
error("invalid header");
goto exit_2;
@@ -100,29 +107,44 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
for (;;) {
- if (fill)
- fill(inp, 4);
+ if (fill) {
+ size = fill(inp, 4);
+ if (size == 0)
+ break;
+ if (size < 4) {
+ error("data corrupted");
+ goto exit_2;
+ }
+ }
chunksize = get_unaligned_le32(inp);
if (chunksize == ARCHIVE_MAGICNUMBER) {
- inp += 4;
- size -= 4;
+ if (!fill) {
+ inp += 4;
+ size -= 4;
+ }
if (posp)
*posp += 4;
continue;
}
- inp += 4;
- size -= 4;
+
if (posp)
*posp += 4;
- if (fill) {
+ if (!fill) {
+ inp += 4;
+ size -= 4;
+ } else {
if (chunksize > lz4_compressbound(uncomp_chunksize)) {
error("chunk length is longer than allocated");
goto exit_2;
}
- fill(inp, chunksize);
+ size = fill(inp, chunksize);
+ if (size < chunksize) {
+ error("data corrupted");
+ goto exit_2;
+ }
}
#ifdef PREBOOT
if (out_len >= uncomp_chunksize) {
@@ -149,18 +171,17 @@ STATIC inline int INIT unlz4(u8 *input, int in_len,
if (posp)
*posp += chunksize;
- size -= chunksize;
+ if (!fill) {
+ size -= chunksize;
- if (size == 0)
- break;
- else if (size < 0) {
- error("data corrupted");
- goto exit_2;
+ if (size == 0)
+ break;
+ else if (size < 0) {
+ error("data corrupted");
+ goto exit_2;
+ }
+ inp += chunksize;
}
-
- inp += chunksize;
- if (fill)
- inp = inp_start;
}
ret = 0;
@@ -175,11 +196,11 @@ exit_0:
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
)
{
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 32adb73a9038..0be83af62b88 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -65,11 +65,11 @@ static long long INIT read_int(unsigned char *ptr, int size)
#define LZMA_IOBUF_SIZE 0x10000
struct rc {
- int (*fill)(void*, unsigned int);
+ long (*fill)(void*, unsigned long);
uint8_t *ptr;
uint8_t *buffer;
uint8_t *buffer_end;
- int buffer_size;
+ long buffer_size;
uint32_t code;
uint32_t range;
uint32_t bound;
@@ -82,7 +82,7 @@ struct rc {
#define RC_MODEL_TOTAL_BITS 11
-static int INIT nofill(void *buffer, unsigned int len)
+static long INIT nofill(void *buffer, unsigned long len)
{
return -1;
}
@@ -99,8 +99,8 @@ static void INIT rc_read(struct rc *rc)
/* Called once */
static inline void INIT rc_init(struct rc *rc,
- int (*fill)(void*, unsigned int),
- char *buffer, int buffer_size)
+ long (*fill)(void*, unsigned long),
+ char *buffer, long buffer_size)
{
if (fill)
rc->fill = fill;
@@ -280,7 +280,7 @@ struct writer {
size_t buffer_pos;
int bufsize;
size_t global_pos;
- int(*flush)(void*, unsigned int);
+ long (*flush)(void*, unsigned long);
struct lzma_header *header;
};
@@ -534,11 +534,11 @@ static inline int INIT process_bit1(struct writer *wr, struct rc *rc,
-STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC inline int INIT unlzma(unsigned char *buf, long in_len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
)
{
@@ -667,11 +667,11 @@ exit_0:
}
#ifdef PREBOOT
-STATIC int INIT decompress(unsigned char *buf, int in_len,
- int(*fill)(void*, unsigned int),
- int(*flush)(void*, unsigned int),
+STATIC int INIT decompress(unsigned char *buf, long in_len,
+ long (*fill)(void*, unsigned long),
+ long (*flush)(void*, unsigned long),
unsigned char *output,
- int *posp,
+ long *posp,
void(*error)(char *x)
)
{
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 960183d4258f..b94a31bdd87d 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -51,7 +51,7 @@ static const unsigned char lzop_magic[] = {
#define HEADER_SIZE_MIN (9 + 7 + 4 + 8 + 1 + 4)
#define HEADER_SIZE_MAX (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4)
-STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
+STATIC inline long INIT parse_header(u8 *input, long *skip, long in_len)
{
int l;
u8 *parse = input;
@@ -108,14 +108,14 @@ STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
return 1;
}
-STATIC inline int INIT unlzo(u8 *input, int in_len,
- int (*fill) (void *, unsigned int),
- int (*flush) (void *, unsigned int),
- u8 *output, int *posp,
+STATIC int INIT unlzo(u8 *input, long in_len,
+ long (*fill)(void *, unsigned long),
+ long (*flush)(void *, unsigned long),
+ u8 *output, long *posp,
void (*error) (char *x))
{
u8 r = 0;
- int skip = 0;
+ long skip = 0;
u32 src_len, dst_len;
size_t tmp;
u8 *in_buf, *in_buf_save, *out_buf;
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
index 9f34eb56854d..b07a78340e9d 100644
--- a/lib/decompress_unxz.c
+++ b/lib/decompress_unxz.c
@@ -248,10 +248,10 @@ void *memmove(void *dest, const void *src, size_t size)
* both input and output buffers are available as a single chunk, i.e. when
* fill() and flush() won't be used.
*/
-STATIC int INIT unxz(unsigned char *in, int in_size,
- int (*fill)(void *dest, unsigned int size),
- int (*flush)(void *src, unsigned int size),
- unsigned char *out, int *in_used,
+STATIC int INIT unxz(unsigned char *in, long in_size,
+ long (*fill)(void *dest, unsigned long size),
+ long (*flush)(void *src, unsigned long size),
+ unsigned char *out, long *in_used,
void (*error)(char *x))
{
struct xz_buf b;
@@ -329,7 +329,7 @@ STATIC int INIT unxz(unsigned char *in, int in_size,
* returned by xz_dec_run(), but probably
* it's not too bad.
*/
- if (flush(b.out, b.out_pos) != (int)b.out_pos)
+ if (flush(b.out, b.out_pos) != (long)b.out_pos)
ret = XZ_BUF_ERROR;
b.out_pos = 0;
diff --git a/lib/glob.c b/lib/glob.c
new file mode 100644
index 000000000000..500fc80d23e1
--- /dev/null
+++ b/lib/glob.c
@@ -0,0 +1,287 @@
+#include <linux/module.h>
+#include <linux/glob.h>
+
+/*
+ * The only reason this code can be compiled as a module is because the
+ * ATA code that depends on it can be as well. In practice, they're
+ * both usually compiled in and the module overhead goes away.
+ */
+MODULE_DESCRIPTION("glob(7) matching");
+MODULE_LICENSE("Dual MIT/GPL");
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match. The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns. Thus, it
+ * does not preprocess the patterns. It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+bool __pure glob_match(char const *pat, char const *str)
+{
+ /*
+ * Backtrack to previous * on mismatch and retry starting one
+ * character later in the string. Because * matches all characters
+ * (no exception for /), it can be easily proved that there's
+ * never a need to backtrack multiple levels.
+ */
+ char const *back_pat = NULL, *back_str = back_str;
+
+ /*
+ * Loop over each token (character or class) in pat, matching
+ * it against the remaining unmatched tail of str. Return false
+ * on mismatch, or true after matching the trailing nul bytes.
+ */
+ for (;;) {
+ unsigned char c = *str++;
+ unsigned char d = *pat++;
+
+ switch (d) {
+ case '?': /* Wildcard: anything but nul */
+ if (c == '\0')
+ return false;
+ break;
+ case '*': /* Any-length wildcard */
+ if (*pat == '\0') /* Optimize trailing * case */
+ return true;
+ back_pat = pat;
+ back_str = --str; /* Allow zero-length match */
+ break;
+ case '[': { /* Character class */
+ bool match = false, inverted = (*pat == '!');
+ char const *class = pat + inverted;
+ unsigned char a = *class++;
+
+ /*
+ * Iterate over each span in the character class.
+ * A span is either a single character a, or a
+ * range a-b. The first span may begin with ']'.
+ */
+ do {
+ unsigned char b = a;
+
+ if (a == '\0') /* Malformed */
+ goto literal;
+
+ if (class[0] == '-' && class[1] != ']') {
+ b = class[1];
+
+ if (b == '\0')
+ goto literal;
+
+ class += 2;
+ /* Any special action if a > b? */
+ }
+ match |= (a <= c && c <= b);
+ } while ((a = *class++) != ']');
+
+ if (match == inverted)
+ goto backtrack;
+ pat = class;
+ }
+ break;
+ case '\\':
+ d = *pat++;
+ /*FALLTHROUGH*/
+ default: /* Literal character */
+literal:
+ if (c == d) {
+ if (d == '\0')
+ return true;
+ break;
+ }
+backtrack:
+ if (c == '\0' || !back_pat)
+ return false; /* No point continuing */
+ /* Try again from last *, one character later in str. */
+ pat = back_pat;
+ str = ++back_str;
+ break;
+ }
+ }
+}
+EXPORT_SYMBOL(glob_match);
+
+
+#ifdef CONFIG_GLOB_SELFTEST
+
+#include <linux/printk.h>
+#include <linux/moduleparam.h>
+
+/* Boot with "glob.verbose=1" to show successful tests, too */
+static bool verbose = false;
+module_param(verbose, bool, 0);
+
+struct glob_test {
+ char const *pat, *str;
+ bool expected;
+};
+
+static bool __pure __init test(char const *pat, char const *str, bool expected)
+{
+ bool match = glob_match(pat, str);
+ bool success = match == expected;
+
+ /* Can't get string literals into a particular section, so... */
+ static char const msg_error[] __initconst =
+ KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n";
+ static char const msg_ok[] __initconst =
+ KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n";
+ static char const mismatch[] __initconst = "mismatch";
+ char const *message;
+
+ if (!success)
+ message = msg_error;
+ else if (verbose)
+ message = msg_ok;
+ else
+ return success;
+
+ printk(message, pat, str, mismatch + 3*match);
+ return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section. The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static char const glob_tests[] __initconst =
+ /* Some basic tests */
+ "1" "a\0" "a\0"
+ "0" "a\0" "b\0"
+ "0" "a\0" "aa\0"
+ "0" "a\0" "\0"
+ "1" "\0" "\0"
+ "0" "\0" "a\0"
+ /* Simple character class tests */
+ "1" "[a]\0" "a\0"
+ "0" "[a]\0" "b\0"
+ "0" "[!a]\0" "a\0"
+ "1" "[!a]\0" "b\0"
+ "1" "[ab]\0" "a\0"
+ "1" "[ab]\0" "b\0"
+ "0" "[ab]\0" "c\0"
+ "1" "[!ab]\0" "c\0"
+ "1" "[a-c]\0" "b\0"
+ "0" "[a-c]\0" "d\0"
+ /* Corner cases in character class parsing */
+ "1" "[a-c-e-g]\0" "-\0"
+ "0" "[a-c-e-g]\0" "d\0"
+ "1" "[a-c-e-g]\0" "f\0"
+ "1" "[]a-ceg-ik[]\0" "a\0"
+ "1" "[]a-ceg-ik[]\0" "]\0"
+ "1" "[]a-ceg-ik[]\0" "[\0"
+ "1" "[]a-ceg-ik[]\0" "h\0"
+ "0" "[]a-ceg-ik[]\0" "f\0"
+ "0" "[!]a-ceg-ik[]\0" "h\0"
+ "0" "[!]a-ceg-ik[]\0" "]\0"
+ "1" "[!]a-ceg-ik[]\0" "f\0"
+ /* Simple wild cards */
+ "1" "?\0" "a\0"
+ "0" "?\0" "aa\0"
+ "0" "??\0" "a\0"
+ "1" "?x?\0" "axb\0"
+ "0" "?x?\0" "abx\0"
+ "0" "?x?\0" "xab\0"
+ /* Asterisk wild cards (backtracking) */
+ "0" "*??\0" "a\0"
+ "1" "*??\0" "ab\0"
+ "1" "*??\0" "abc\0"
+ "1" "*??\0" "abcd\0"
+ "0" "??*\0" "a\0"
+ "1" "??*\0" "ab\0"
+ "1" "??*\0" "abc\0"
+ "1" "??*\0" "abcd\0"
+ "0" "?*?\0" "a\0"
+ "1" "?*?\0" "ab\0"
+ "1" "?*?\0" "abc\0"
+ "1" "?*?\0" "abcd\0"
+ "1" "*b\0" "b\0"
+ "1" "*b\0" "ab\0"
+ "0" "*b\0" "ba\0"
+ "1" "*b\0" "bb\0"
+ "1" "*b\0" "abb\0"
+ "1" "*b\0" "bab\0"
+ "1" "*bc\0" "abbc\0"
+ "1" "*bc\0" "bc\0"
+ "1" "*bc\0" "bbc\0"
+ "1" "*bc\0" "bcbc\0"
+ /* Multiple asterisks (complex backtracking) */
+ "1" "*ac*\0" "abacadaeafag\0"
+ "1" "*ac*ae*ag*\0" "abacadaeafag\0"
+ "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+ "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+ "1" "*abcd*\0" "abcabcabcabcdefg\0"
+ "1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+ "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+ "0" "*abcd*\0" "abcabcabcabcefg\0"
+ "0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+static int __init glob_init(void)
+{
+ unsigned successes = 0;
+ unsigned n = 0;
+ char const *p = glob_tests;
+ static char const message[] __initconst =
+ KERN_INFO "glob: %u self-tests passed, %u failed\n";
+
+ /*
+ * Tests are jammed together in a string. The first byte is '1'
+ * or '0' to indicate the expected outcome, or '\0' to indicate the
+ * end of the tests. Then come two null-terminated strings: the
+ * pattern and the string to match it against.
+ */
+ while (*p) {
+ bool expected = *p++ & 1;
+ char const *pat = p;
+
+ p += strlen(p) + 1;
+ successes += test(pat, p, expected);
+ p += strlen(p) + 1;
+ n++;
+ }
+
+ n -= successes;
+ printk(message, successes, n);
+
+ /* What's the errno for "kernel bug detected"? Guess... */
+ return n ? -ECANCELED : 0;
+}
+
+/* We need a dummy exit function to allow unload */
+static void __exit glob_fini(void) { }
+
+module_init(glob_init);
+module_exit(glob_fini);
+
+#endif /* CONFIG_GLOB_SELFTEST */
diff --git a/lib/idr.c b/lib/idr.c
index 39158abebad1..50be3fa9b657 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -590,26 +590,27 @@ static void __idr_remove_all(struct idr *idp)
struct idr_layer **paa = &pa[0];
n = idp->layers * IDR_BITS;
- p = idp->top;
+ *paa = idp->top;
RCU_INIT_POINTER(idp->top, NULL);
max = idr_max(idp->layers);
id = 0;
while (id >= 0 && id <= max) {
+ p = *paa;
while (n > IDR_BITS && p) {
n -= IDR_BITS;
- *paa++ = p;
p = p->ary[(id >> n) & IDR_MASK];
+ *++paa = p;
}
bt_mask = id;
id += 1 << n;
/* Get the highest bit that the above add changed from 0->1. */
while (n < fls(id ^ bt_mask)) {
- if (p)
- free_layer(idp, p);
+ if (*paa)
+ free_layer(idp, *paa);
n += IDR_BITS;
- p = *--paa;
+ --paa;
}
}
idp->layers = 0;
@@ -692,15 +693,16 @@ int idr_for_each(struct idr *idp,
struct idr_layer **paa = &pa[0];
n = idp->layers * IDR_BITS;
- p = rcu_dereference_raw(idp->top);
+ *paa = rcu_dereference_raw(idp->top);
max = idr_max(idp->layers);
id = 0;
while (id >= 0 && id <= max) {
+ p = *paa;
while (n > 0 && p) {
n -= IDR_BITS;
- *paa++ = p;
p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+ *++paa = p;
}
if (p) {
@@ -712,7 +714,7 @@ int idr_for_each(struct idr *idp,
id += 1 << n;
while (n < fls(id)) {
n += IDR_BITS;
- p = *--paa;
+ --paa;
}
}
@@ -740,17 +742,18 @@ void *idr_get_next(struct idr *idp, int *nextidp)
int n, max;
/* find first ent */
- p = rcu_dereference_raw(idp->top);
+ p = *paa = rcu_dereference_raw(idp->top);
if (!p)
return NULL;
n = (p->layer + 1) * IDR_BITS;
max = idr_max(p->layer + 1);
while (id >= 0 && id <= max) {
+ p = *paa;
while (n > 0 && p) {
n -= IDR_BITS;
- *paa++ = p;
p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+ *++paa = p;
}
if (p) {
@@ -768,7 +771,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
id = round_up(id + 1, 1 << n);
while (n < fls(id)) {
n += IDR_BITS;
- p = *--paa;
+ --paa;
}
}
return NULL;
diff --git a/lib/kfifo.c b/lib/kfifo.c
index d79b9d222065..90ba1eb1df06 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -561,8 +561,7 @@ EXPORT_SYMBOL(__kfifo_to_user_r);
unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo,
struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
{
- if (!nents)
- BUG();
+ BUG_ON(!nents);
len = __kfifo_max_r(len, recsize);
@@ -585,8 +584,7 @@ EXPORT_SYMBOL(__kfifo_dma_in_finish_r);
unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo,
struct scatterlist *sgl, int nents, unsigned int len, size_t recsize)
{
- if (!nents)
- BUG();
+ BUG_ON(!nents);
len = __kfifo_max_r(len, recsize);
diff --git a/lib/klist.c b/lib/klist.c
index 358a368a2947..89b485a2a58d 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -140,11 +140,11 @@ void klist_add_tail(struct klist_node *n, struct klist *k)
EXPORT_SYMBOL_GPL(klist_add_tail);
/**
- * klist_add_after - Init a klist_node and add it after an existing node
+ * klist_add_behind - Init a klist_node and add it after an existing node
* @n: node we're adding.
* @pos: node to put @n after
*/
-void klist_add_after(struct klist_node *n, struct klist_node *pos)
+void klist_add_behind(struct klist_node *n, struct klist_node *pos)
{
struct klist *k = knode_klist(pos);
@@ -153,7 +153,7 @@ void klist_add_after(struct klist_node *n, struct klist_node *pos)
list_add(&n->n_node, &pos->n_node);
spin_unlock(&k->k_lock);
}
-EXPORT_SYMBOL_GPL(klist_add_after);
+EXPORT_SYMBOL_GPL(klist_add_behind);
/**
* klist_add_before - Init a klist_node and add it before an existing node
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 1183fa70a44d..12bcba1c8612 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -1,3 +1,6 @@
+
+#define pr_fmt(fmt) "list_sort_test: " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/list_sort.h>
@@ -47,6 +50,7 @@ static void merge_and_restore_back_links(void *priv,
struct list_head *a, struct list_head *b)
{
struct list_head *tail = head;
+ u8 count = 0;
while (a && b) {
/* if equal, take 'a' -- important for sort stability */
@@ -70,7 +74,8 @@ static void merge_and_restore_back_links(void *priv,
* element comparison is needed, so the client's cmp()
* routine can invoke cond_resched() periodically.
*/
- (*cmp)(priv, tail->next, tail->next);
+ if (unlikely(!(++count)))
+ (*cmp)(priv, tail->next, tail->next);
tail->next->prev = tail;
tail = tail->next;
@@ -123,9 +128,7 @@ void list_sort(void *priv, struct list_head *head,
}
if (lev > max_lev) {
if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
- printk_once(KERN_DEBUG "list passed to"
- " list_sort() too long for"
- " efficiency\n");
+ printk_once(KERN_DEBUG "list too long for efficiency\n");
lev--;
}
max_lev = lev;
@@ -168,27 +171,25 @@ static struct debug_el **elts __initdata;
static int __init check(struct debug_el *ela, struct debug_el *elb)
{
if (ela->serial >= TEST_LIST_LEN) {
- printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
- ela->serial);
+ pr_err("error: incorrect serial %d\n", ela->serial);
return -EINVAL;
}
if (elb->serial >= TEST_LIST_LEN) {
- printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
- elb->serial);
+ pr_err("error: incorrect serial %d\n", elb->serial);
return -EINVAL;
}
if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
- printk(KERN_ERR "list_sort_test: error: phantom element\n");
+ pr_err("error: phantom element\n");
return -EINVAL;
}
if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
- printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
- ela->poison1, ela->poison2);
+ pr_err("error: bad poison: %#x/%#x\n",
+ ela->poison1, ela->poison2);
return -EINVAL;
}
if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
- printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
- elb->poison1, elb->poison2);
+ pr_err("error: bad poison: %#x/%#x\n",
+ elb->poison1, elb->poison2);
return -EINVAL;
}
return 0;
@@ -207,25 +208,23 @@ static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
static int __init list_sort_test(void)
{
- int i, count = 1, err = -EINVAL;
+ int i, count = 1, err = -ENOMEM;
struct debug_el *el;
- struct list_head *cur, *tmp;
+ struct list_head *cur;
LIST_HEAD(head);
- printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
+ pr_debug("start testing list_sort()\n");
- elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
+ elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL);
if (!elts) {
- printk(KERN_ERR "list_sort_test: error: cannot allocate "
- "memory\n");
- goto exit;
+ pr_err("error: cannot allocate memory\n");
+ return err;
}
for (i = 0; i < TEST_LIST_LEN; i++) {
el = kmalloc(sizeof(*el), GFP_KERNEL);
if (!el) {
- printk(KERN_ERR "list_sort_test: error: cannot "
- "allocate memory\n");
+ pr_err("error: cannot allocate memory\n");
goto exit;
}
/* force some equivalencies */
@@ -239,52 +238,52 @@ static int __init list_sort_test(void)
list_sort(NULL, &head, cmp);
+ err = -EINVAL;
for (cur = head.next; cur->next != &head; cur = cur->next) {
struct debug_el *el1;
int cmp_result;
if (cur->next->prev != cur) {
- printk(KERN_ERR "list_sort_test: error: list is "
- "corrupted\n");
+ pr_err("error: list is corrupted\n");
goto exit;
}
cmp_result = cmp(NULL, cur, cur->next);
if (cmp_result > 0) {
- printk(KERN_ERR "list_sort_test: error: list is not "
- "sorted\n");
+ pr_err("error: list is not sorted\n");
goto exit;
}
el = container_of(cur, struct debug_el, list);
el1 = container_of(cur->next, struct debug_el, list);
if (cmp_result == 0 && el->serial >= el1->serial) {
- printk(KERN_ERR "list_sort_test: error: order of "
- "equivalent elements not preserved\n");
+ pr_err("error: order of equivalent elements not "
+ "preserved\n");
goto exit;
}
if (check(el, el1)) {
- printk(KERN_ERR "list_sort_test: error: element check "
- "failed\n");
+ pr_err("error: element check failed\n");
goto exit;
}
count++;
}
+ if (head.prev != cur) {
+ pr_err("error: list is corrupted\n");
+ goto exit;
+ }
+
if (count != TEST_LIST_LEN) {
- printk(KERN_ERR "list_sort_test: error: bad list length %d",
- count);
+ pr_err("error: bad list length %d", count);
goto exit;
}
err = 0;
exit:
+ for (i = 0; i < TEST_LIST_LEN; i++)
+ kfree(elts[i]);
kfree(elts);
- list_for_each_safe(cur, tmp, &head) {
- list_del(cur);
- kfree(container_of(cur, struct debug_el, list));
- }
return err;
}
module_init(list_sort_test);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 3a8e8e8fb2a5..4251cbd5becb 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(sg_nents);
**/
struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
{
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
struct scatterlist *ret = &sgl[nents - 1];
#else
struct scatterlist *sg, *ret = NULL;
@@ -251,7 +251,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
if (nents == 0)
return -EINVAL;
-#ifndef ARCH_HAS_SG_CHAIN
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
if (WARN_ON_ONCE(nents > max_ents))
return -EINVAL;
#endif
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index ed5c1454dd62..29033f319aea 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -25,12 +25,15 @@
int string_get_size(u64 size, const enum string_size_units units,
char *buf, int len)
{
- static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB",
- "EB", "ZB", "YB", NULL};
- static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
- "EiB", "ZiB", "YiB", NULL };
- static const char **units_str[] = {
- [STRING_UNITS_10] = units_10,
+ static const char *const units_10[] = {
+ "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL
+ };
+ static const char *const units_2[] = {
+ "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB",
+ NULL
+ };
+ static const char *const *const units_str[] = {
+ [STRING_UNITS_10] = units_10,
[STRING_UNITS_2] = units_2,
};
static const unsigned int divisor[] = {
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
index bea3f3fa3f02..4137bca5f8e8 100644
--- a/lib/test-kstrtox.c
+++ b/lib/test-kstrtox.c
@@ -3,7 +3,7 @@
#include <linux/module.h>
#define for_each_test(i, test) \
- for (i = 0; i < sizeof(test) / sizeof(test[0]); i++)
+ for (i = 0; i < ARRAY_SIZE(test); i++)
struct test_fail {
const char *str;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 6fe2c84eb055..0eced40344dd 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1183,6 +1183,21 @@ char *address_val(char *buf, char *end, const void *addr,
return number(buf, end, num, spec);
}
+static noinline_for_stack
+char *comm_name(char *buf, char *end, struct task_struct *tsk,
+ struct printf_spec spec, const char *fmt)
+{
+ char name[TASK_COMM_LEN];
+
+ /* Caller can pass NULL instead of current. */
+ if (!tsk)
+ tsk = current;
+ /* Not using get_task_comm() in case I'm in IRQ context. */
+ memcpy(name, tsk->comm, TASK_COMM_LEN);
+ name[sizeof(name) - 1] = '\0';
+ return string(buf, end, name, spec);
+}
+
int kptr_restrict __read_mostly;
/*
@@ -1250,6 +1265,7 @@ int kptr_restrict __read_mostly;
* (default assumed to be phys_addr_t, passed by reference)
* - 'd[234]' For a dentry name (optionally 2-4 last components)
* - 'D[234]' Same as 'd' but for a struct file
+ * - 'T' task_struct->comm
*
* Note: The difference between 'S' and 'F' is that on ia64 and ppc64
* function pointers are really function descriptors, which contain a
@@ -1261,7 +1277,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
{
int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
- if (!ptr && *fmt != 'K') {
+ if (!ptr && *fmt != 'K' && *fmt != 'T') {
/*
* Print (null) with the same width as a pointer so it makes
* tabular output look nice.
@@ -1389,6 +1405,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
return dentry_name(buf, end,
((const struct file *)ptr)->f_path.dentry,
spec, fmt);
+ case 'T':
+ return comm_name(buf, end, ptr, spec, fmt);
}
spec.flags |= SMALL;
if (spec.field_width == -1) {
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index d63381e8e333..d20ef458f137 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -250,52 +250,6 @@ int zlib_deflateInit2(
}
/* ========================================================================= */
-#if 0
-int zlib_deflateSetDictionary(
- z_streamp strm,
- const Byte *dictionary,
- uInt dictLength
-)
-{
- deflate_state *s;
- uInt length = dictLength;
- uInt n;
- IPos hash_head = 0;
-
- if (strm == NULL || strm->state == NULL || dictionary == NULL)
- return Z_STREAM_ERROR;
-
- s = (deflate_state *) strm->state;
- if (s->status != INIT_STATE) return Z_STREAM_ERROR;
-
- strm->adler = zlib_adler32(strm->adler, dictionary, dictLength);
-
- if (length < MIN_MATCH) return Z_OK;
- if (length > MAX_DIST(s)) {
- length = MAX_DIST(s);
-#ifndef USE_DICT_HEAD
- dictionary += dictLength - length; /* use the tail of the dictionary */
-#endif
- }
- memcpy((char *)s->window, dictionary, length);
- s->strstart = length;
- s->block_start = (long)length;
-
- /* Insert all strings in the hash table (except for the last two bytes).
- * s->lookahead stays null, so s->ins_h will be recomputed at the next
- * call of fill_window.
- */
- s->ins_h = s->window[0];
- UPDATE_HASH(s, s->ins_h, s->window[1]);
- for (n = 0; n <= length - MIN_MATCH; n++) {
- INSERT_STRING(s, n, hash_head);
- }
- if (hash_head) hash_head = 0; /* to make compiler happy */
- return Z_OK;
-}
-#endif /* 0 */
-
-/* ========================================================================= */
int zlib_deflateReset(
z_streamp strm
)
@@ -326,45 +280,6 @@ int zlib_deflateReset(
return Z_OK;
}
-/* ========================================================================= */
-#if 0
-int zlib_deflateParams(
- z_streamp strm,
- int level,
- int strategy
-)
-{
- deflate_state *s;
- compress_func func;
- int err = Z_OK;
-
- if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
- s = (deflate_state *) strm->state;
-
- if (level == Z_DEFAULT_COMPRESSION) {
- level = 6;
- }
- if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
- return Z_STREAM_ERROR;
- }
- func = configuration_table[s->level].func;
-
- if (func != configuration_table[level].func && strm->total_in != 0) {
- /* Flush the last buffer: */
- err = zlib_deflate(strm, Z_PARTIAL_FLUSH);
- }
- if (s->level != level) {
- s->level = level;
- s->max_lazy_match = configuration_table[level].max_lazy;
- s->good_match = configuration_table[level].good_length;
- s->nice_match = configuration_table[level].nice_length;
- s->max_chain_length = configuration_table[level].max_chain;
- }
- s->strategy = strategy;
- return err;
-}
-#endif /* 0 */
-
/* =========================================================================
* Put a short in the pending buffer. The 16-bit value is put in MSB order.
* IN assertion: the stream state is correct and there is enough room in
@@ -568,64 +483,6 @@ int zlib_deflateEnd(
return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
}
-/* =========================================================================
- * Copy the source state to the destination state.
- */
-#if 0
-int zlib_deflateCopy (
- z_streamp dest,
- z_streamp source
-)
-{
-#ifdef MAXSEG_64K
- return Z_STREAM_ERROR;
-#else
- deflate_state *ds;
- deflate_state *ss;
- ush *overlay;
- deflate_workspace *mem;
-
-
- if (source == NULL || dest == NULL || source->state == NULL) {
- return Z_STREAM_ERROR;
- }
-
- ss = (deflate_state *) source->state;
-
- *dest = *source;
-
- mem = (deflate_workspace *) dest->workspace;
-
- ds = &(mem->deflate_memory);
-
- dest->state = (struct internal_state *) ds;
- *ds = *ss;
- ds->strm = dest;
-
- ds->window = (Byte *) mem->window_memory;
- ds->prev = (Pos *) mem->prev_memory;
- ds->head = (Pos *) mem->head_memory;
- overlay = (ush *) mem->overlay_memory;
- ds->pending_buf = (uch *) overlay;
-
- memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
- memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
- memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
- memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
-
- ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
- ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
- ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
-
- ds->l_desc.dyn_tree = ds->dyn_ltree;
- ds->d_desc.dyn_tree = ds->dyn_dtree;
- ds->bl_desc.dyn_tree = ds->bl_tree;
-
- return Z_OK;
-#endif
-}
-#endif /* 0 */
-
/* ===========================================================================
* Read a new buffer from the current input stream, update the adler32
* and total number of bytes read. All deflate() input goes through
diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c
index f5ce87b0800e..58a733b10387 100644
--- a/lib/zlib_inflate/inflate.c
+++ b/lib/zlib_inflate/inflate.c
@@ -45,21 +45,6 @@ int zlib_inflateReset(z_streamp strm)
return Z_OK;
}
-#if 0
-int zlib_inflatePrime(z_streamp strm, int bits, int value)
-{
- struct inflate_state *state;
-
- if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state *)strm->state;
- if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
- value &= (1L << bits) - 1;
- state->hold += value << state->bits;
- state->bits += bits;
- return Z_OK;
-}
-#endif
-
int zlib_inflateInit2(z_streamp strm, int windowBits)
{
struct inflate_state *state;
@@ -761,123 +746,6 @@ int zlib_inflateEnd(z_streamp strm)
return Z_OK;
}
-#if 0
-int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary,
- uInt dictLength)
-{
- struct inflate_state *state;
- unsigned long id;
-
- /* check state */
- if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state *)strm->state;
- if (state->wrap != 0 && state->mode != DICT)
- return Z_STREAM_ERROR;
-
- /* check for correct dictionary id */
- if (state->mode == DICT) {
- id = zlib_adler32(0L, NULL, 0);
- id = zlib_adler32(id, dictionary, dictLength);
- if (id != state->check)
- return Z_DATA_ERROR;
- }
-
- /* copy dictionary to window */
- zlib_updatewindow(strm, strm->avail_out);
-
- if (dictLength > state->wsize) {
- memcpy(state->window, dictionary + dictLength - state->wsize,
- state->wsize);
- state->whave = state->wsize;
- }
- else {
- memcpy(state->window + state->wsize - dictLength, dictionary,
- dictLength);
- state->whave = dictLength;
- }
- state->havedict = 1;
- return Z_OK;
-}
-#endif
-
-#if 0
-/*
- Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found
- or when out of input. When called, *have is the number of pattern bytes
- found in order so far, in 0..3. On return *have is updated to the new
- state. If on return *have equals four, then the pattern was found and the
- return value is how many bytes were read including the last byte of the
- pattern. If *have is less than four, then the pattern has not been found
- yet and the return value is len. In the latter case, zlib_syncsearch() can be
- called again with more data and the *have state. *have is initialized to
- zero for the first call.
- */
-static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf,
- unsigned len)
-{
- unsigned got;
- unsigned next;
-
- got = *have;
- next = 0;
- while (next < len && got < 4) {
- if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
- got++;
- else if (buf[next])
- got = 0;
- else
- got = 4 - got;
- next++;
- }
- *have = got;
- return next;
-}
-#endif
-
-#if 0
-int zlib_inflateSync(z_streamp strm)
-{
- unsigned len; /* number of bytes to look at or looked at */
- unsigned long in, out; /* temporary to save total_in and total_out */
- unsigned char buf[4]; /* to restore bit buffer to byte string */
- struct inflate_state *state;
-
- /* check parameters */
- if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
- state = (struct inflate_state *)strm->state;
- if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
-
- /* if first time, start search in bit buffer */
- if (state->mode != SYNC) {
- state->mode = SYNC;
- state->hold <<= state->bits & 7;
- state->bits -= state->bits & 7;
- len = 0;
- while (state->bits >= 8) {
- buf[len++] = (unsigned char)(state->hold);
- state->hold >>= 8;
- state->bits -= 8;
- }
- state->have = 0;
- zlib_syncsearch(&(state->have), buf, len);
- }
-
- /* search available input */
- len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in);
- strm->avail_in -= len;
- strm->next_in += len;
- strm->total_in += len;
-
- /* return no joy or set up to restart inflate() on a new block */
- if (state->have != 4) return Z_DATA_ERROR;
- in = strm->total_in; out = strm->total_out;
- zlib_inflateReset(strm);
- strm->total_in = in; strm->total_out = out;
- state->mode = TYPE;
- return Z_OK;
-}
-#endif
-
/*
* This subroutine adds the data at next_in/avail_in to the output history
* without performing any output. The output buffer must be "caught up";
diff --git a/mm/Kconfig b/mm/Kconfig
index 3e9977a9d657..886db2158538 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -508,21 +508,34 @@ config CMA_DEBUG
processing calls such as dma_alloc_from_contiguous().
This option does not affect warning and error messages.
-config ZBUD
- tristate
- default n
+config CMA_AREAS
+ int "Maximum count of the CMA areas"
+ depends on CMA
+ default 7
help
- A special purpose allocator for storing compressed pages.
- It is designed to store up to two compressed pages per physical
- page. While this design limits storage density, it has simple and
- deterministic reclaim properties that make it preferable to a higher
- density approach when reclaim will be used.
+ CMA allows to create CMA areas for particular purpose, mainly,
+ used as device private area. This parameter sets the maximum
+ number of CMA area in the system.
+
+ If unsure, leave the default value "7".
+
+config MEM_SOFT_DIRTY
+ bool "Track memory changes"
+ depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
+ select PROC_PAGE_MONITOR
+ help
+ This option enables memory changes tracking by introducing a
+ soft-dirty bit on pte-s. This bit it set when someone writes
+ into a page just as regular dirty bit, but unlike the latter
+ it can be cleared by hands.
+
+ See Documentation/vm/soft-dirty.txt for more details.
config ZSWAP
bool "Compressed cache for swap pages (EXPERIMENTAL)"
depends on FRONTSWAP && CRYPTO=y
select CRYPTO_LZO
- select ZBUD
+ select ZPOOL
default n
help
A lightweight compressed cache for swap pages. It takes
@@ -538,17 +551,22 @@ config ZSWAP
they have not be fully explored on the large set of potential
configurations and workloads that exist.
-config MEM_SOFT_DIRTY
- bool "Track memory changes"
- depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
- select PROC_PAGE_MONITOR
+config ZPOOL
+ tristate "Common API for compressed memory storage"
+ default n
help
- This option enables memory changes tracking by introducing a
- soft-dirty bit on pte-s. This bit it set when someone writes
- into a page just as regular dirty bit, but unlike the latter
- it can be cleared by hands.
+ Compressed memory storage API. This allows using either zbud or
+ zsmalloc.
- See Documentation/vm/soft-dirty.txt for more details.
+config ZBUD
+ tristate "Low density storage for compressed pages"
+ default n
+ help
+ A special purpose allocator for storing compressed pages.
+ It is designed to store up to two compressed pages per physical
+ page. While this design limits storage density, it has simple and
+ deterministic reclaim properties that make it preferable to a higher
+ density approach when reclaim will be used.
config ZSMALLOC
tristate "Memory allocator for compressed pages"
diff --git a/mm/Makefile b/mm/Makefile
index 4064f3ec145e..632ae77e6070 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -59,6 +59,8 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
obj-$(CONFIG_CLEANCACHE) += cleancache.o
obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
+obj-$(CONFIG_ZPOOL) += zpool.o
obj-$(CONFIG_ZBUD) += zbud.o
obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
+obj-$(CONFIG_CMA) += cma.o
diff --git a/mm/cma.c b/mm/cma.c
new file mode 100644
index 000000000000..c17751c0dcaf
--- /dev/null
+++ b/mm/cma.c
@@ -0,0 +1,335 @@
+/*
+ * Contiguous Memory Allocator
+ *
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Copyright IBM Corporation, 2013
+ * Copyright LG Electronics Inc., 2014
+ * Written by:
+ * Marek Szyprowski <m.szyprowski@samsung.com>
+ * Michal Nazarewicz <mina86@mina86.com>
+ * Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ * Joonsoo Kim <iamjoonsoo.kim@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+#define pr_fmt(fmt) "cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+# define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/log2.h>
+#include <linux/cma.h>
+
+struct cma {
+ unsigned long base_pfn;
+ unsigned long count;
+ unsigned long *bitmap;
+ unsigned int order_per_bit; /* Order of pages represented by one bit */
+ struct mutex lock;
+};
+
+static struct cma cma_areas[MAX_CMA_AREAS];
+static unsigned cma_area_count;
+static DEFINE_MUTEX(cma_mutex);
+
+phys_addr_t cma_get_base(struct cma *cma)
+{
+ return PFN_PHYS(cma->base_pfn);
+}
+
+unsigned long cma_get_size(struct cma *cma)
+{
+ return cma->count << PAGE_SHIFT;
+}
+
+static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
+{
+ return (1UL << (align_order >> cma->order_per_bit)) - 1;
+}
+
+static unsigned long cma_bitmap_maxno(struct cma *cma)
+{
+ return cma->count >> cma->order_per_bit;
+}
+
+static unsigned long cma_bitmap_pages_to_bits(struct cma *cma,
+ unsigned long pages)
+{
+ return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
+}
+
+static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count)
+{
+ unsigned long bitmap_no, bitmap_count;
+
+ bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
+ bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+ mutex_lock(&cma->lock);
+ bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
+ mutex_unlock(&cma->lock);
+}
+
+static int __init cma_activate_area(struct cma *cma)
+{
+ int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
+ unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+ unsigned i = cma->count >> pageblock_order;
+ struct zone *zone;
+
+ cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+ if (!cma->bitmap)
+ return -ENOMEM;
+
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ zone = page_zone(pfn_to_page(pfn));
+
+ do {
+ unsigned j;
+
+ base_pfn = pfn;
+ for (j = pageblock_nr_pages; j; --j, pfn++) {
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ /*
+ * alloc_contig_range requires the pfn range
+ * specified to be in the same zone. Make this
+ * simple by forcing the entire CMA resv range
+ * to be in the same zone.
+ */
+ if (page_zone(pfn_to_page(pfn)) != zone)
+ goto err;
+ }
+ init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+ } while (--i);
+
+ mutex_init(&cma->lock);
+ return 0;
+
+err:
+ kfree(cma->bitmap);
+ return -EINVAL;
+}
+
+static int __init cma_init_reserved_areas(void)
+{
+ int i;
+
+ for (i = 0; i < cma_area_count; i++) {
+ int ret = cma_activate_area(&cma_areas[i]);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+core_initcall(cma_init_reserved_areas);
+
+/**
+ * cma_declare_contiguous() - reserve custom contiguous area
+ * @base: Base address of the reserved area optional, use 0 for any
+ * @size: Size of the reserved area (in bytes),
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ * @alignment: Alignment for the CMA area, should be power of 2 or zero
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @fixed: hint about where to place the reserved area
+ * @res_cma: Pointer to store the created cma region.
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base. If false,
+ * reserve in range from @base to @limit.
+ */
+int __init cma_declare_contiguous(phys_addr_t base,
+ phys_addr_t size, phys_addr_t limit,
+ phys_addr_t alignment, unsigned int order_per_bit,
+ bool fixed, struct cma **res_cma)
+{
+ struct cma *cma;
+ int ret = 0;
+
+ pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n",
+ __func__, (unsigned long)size, (unsigned long)base,
+ (unsigned long)limit, (unsigned long)alignment);
+
+ if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+ pr_err("Not enough slots for CMA reserved regions!\n");
+ return -ENOSPC;
+ }
+
+ if (!size)
+ return -EINVAL;
+
+ if (alignment && !is_power_of_2(alignment))
+ return -EINVAL;
+
+ /*
+ * Sanitise input arguments.
+ * Pages both ends in CMA area could be merged into adjacent unmovable
+ * migratetype page by page allocator's buddy algorithm. In the case,
+ * you couldn't get a contiguous memory, which is not what we want.
+ */
+ alignment = max(alignment,
+ (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+ base = ALIGN(base, alignment);
+ size = ALIGN(size, alignment);
+ limit &= ~(alignment - 1);
+
+ /* size should be aligned with order_per_bit */
+ if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
+ return -EINVAL;
+
+ /* Reserve memory */
+ if (base && fixed) {
+ if (memblock_is_region_reserved(base, size) ||
+ memblock_reserve(base, size) < 0) {
+ ret = -EBUSY;
+ goto err;
+ }
+ } else {
+ phys_addr_t addr = memblock_alloc_range(size, alignment, base,
+ limit);
+ if (!addr) {
+ ret = -ENOMEM;
+ goto err;
+ } else {
+ base = addr;
+ }
+ }
+
+ /*
+ * Each reserved area must be initialised later, when more kernel
+ * subsystems (like slab allocator) are available.
+ */
+ cma = &cma_areas[cma_area_count];
+ cma->base_pfn = PFN_DOWN(base);
+ cma->count = size >> PAGE_SHIFT;
+ cma->order_per_bit = order_per_bit;
+ *res_cma = cma;
+ cma_area_count++;
+
+ pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
+ (unsigned long)base);
+ return 0;
+
+err:
+ pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+ return ret;
+}
+
+/**
+ * cma_alloc() - allocate pages from contiguous area
+ * @cma: Contiguous memory region for which the allocation is performed.
+ * @count: Requested number of pages.
+ * @align: Requested alignment of pages (in PAGE_SIZE order).
+ *
+ * This function allocates part of contiguous memory on specific
+ * contiguous memory area.
+ */
+struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
+{
+ unsigned long mask, pfn, start = 0;
+ unsigned long bitmap_maxno, bitmap_no, bitmap_count;
+ struct page *page = NULL;
+ int ret;
+
+ if (!cma || !cma->count)
+ return NULL;
+
+ pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,
+ count, align);
+
+ if (!count)
+ return NULL;
+
+ mask = cma_bitmap_aligned_mask(cma, align);
+ bitmap_maxno = cma_bitmap_maxno(cma);
+ bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+
+ for (;;) {
+ mutex_lock(&cma->lock);
+ bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
+ bitmap_maxno, start, bitmap_count, mask);
+ if (bitmap_no >= bitmap_maxno) {
+ mutex_unlock(&cma->lock);
+ break;
+ }
+ bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
+ /*
+ * It's safe to drop the lock here. We've marked this region for
+ * our exclusive use. If the migration fails we will take the
+ * lock again and unmark it.
+ */
+ mutex_unlock(&cma->lock);
+
+ pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
+ mutex_lock(&cma_mutex);
+ ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
+ mutex_unlock(&cma_mutex);
+ if (ret == 0) {
+ page = pfn_to_page(pfn);
+ break;
+ }
+
+ cma_clear_bitmap(cma, pfn, count);
+ if (ret != -EBUSY)
+ break;
+
+ pr_debug("%s(): memory range at %p is busy, retrying\n",
+ __func__, pfn_to_page(pfn));
+ /* try again with a bit different memory target */
+ start = bitmap_no + mask + 1;
+ }
+
+ pr_debug("%s(): returned %p\n", __func__, page);
+ return page;
+}
+
+/**
+ * cma_release() - release allocated pages
+ * @cma: Contiguous memory region for which the allocation is performed.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function releases memory allocated by alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool cma_release(struct cma *cma, struct page *pages, int count)
+{
+ unsigned long pfn;
+
+ if (!cma || !pages)
+ return false;
+
+ pr_debug("%s(page %p)\n", __func__, (void *)pages);
+
+ pfn = page_to_pfn(pages);
+
+ if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+ return false;
+
+ VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
+
+ free_contig_range(pfn, count);
+ cma_clear_bitmap(cma, pfn, count);
+
+ return true;
+}
diff --git a/mm/compaction.c b/mm/compaction.c
index 21bf292b642a..51750197db11 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -325,14 +325,14 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
/* Found a free page, break it into order-0 pages */
isolated = split_free_page(page);
- total_isolated += isolated;
- for (i = 0; i < isolated; i++) {
- list_add(&page->lru, freelist);
- page++;
- }
-
- /* If a page was split, advance to the end of it */
if (isolated) {
+ total_isolated += isolated;
+ for (i = 0; i < isolated; i++) {
+ list_add(&page->lru, freelist);
+ page++;
+ }
+
+ /* If a page was split, advance to the end of it */
blockpfn += isolated - 1;
cursor += isolated - 1;
continue;
@@ -341,9 +341,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
isolate_fail:
if (strict)
break;
- else
- continue;
-
}
trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated);
diff --git a/mm/filemap.c b/mm/filemap.c
index d175917e2411..21a4df0e080f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -233,7 +233,6 @@ void delete_from_page_cache(struct page *page)
spin_lock_irq(&mapping->tree_lock);
__delete_from_page_cache(page, NULL);
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
if (freepage)
freepage(page);
@@ -489,8 +488,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
if (PageSwapBacked(new))
__inc_zone_page_state(new, NR_SHMEM);
spin_unlock_irq(&mapping->tree_lock);
- /* mem_cgroup codes must not be called under tree_lock */
- mem_cgroup_replace_page_cache(old, new);
+ mem_cgroup_migrate(old, new, true);
radix_tree_preload_end();
if (freepage)
freepage(old);
@@ -548,19 +546,19 @@ static int __add_to_page_cache_locked(struct page *page,
pgoff_t offset, gfp_t gfp_mask,
void **shadowp)
{
+ struct mem_cgroup *memcg;
int error;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapBacked(page), page);
- error = mem_cgroup_charge_file(page, current->mm,
- gfp_mask & GFP_RECLAIM_MASK);
+ error = mem_cgroup_try_charge(page, current->mm, gfp_mask, &memcg);
if (error)
return error;
error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
if (error) {
- mem_cgroup_uncharge_cache_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
return error;
}
@@ -575,13 +573,14 @@ static int __add_to_page_cache_locked(struct page *page,
goto err_insert;
__inc_zone_page_state(page, NR_FILE_PAGES);
spin_unlock_irq(&mapping->tree_lock);
+ mem_cgroup_commit_charge(page, memcg, false);
trace_mm_filemap_add_to_page_cache(page);
return 0;
err_insert:
page->mapping = NULL;
/* Leave page->index set: truncation relies upon it */
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
page_cache_release(page);
return error;
}
@@ -808,6 +807,17 @@ int __lock_page_killable(struct page *page)
}
EXPORT_SYMBOL_GPL(__lock_page_killable);
+/*
+ * Return values:
+ * 1 - page is locked; mmap_sem is still held.
+ * 0 - page is not locked.
+ * mmap_sem has been released (up_read()), unless flags had both
+ * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
+ * which case mmap_sem is still held.
+ *
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
+ * with the page locked and the mmap_sem unperturbed.
+ */
int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
{
@@ -1088,9 +1098,9 @@ no_page:
if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
fgp_flags |= FGP_LOCK;
- /* Init accessed so avoit atomic mark_page_accessed later */
+ /* Init accessed so avoid atomic mark_page_accessed later */
if (fgp_flags & FGP_ACCESSED)
- init_page_accessed(page);
+ __SetPageReferenced(page);
err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
if (unlikely(err)) {
@@ -1824,6 +1834,18 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
* The goto's are kind of ugly, but this streamlines the normal case of having
* it in the page cache, and handles the special cases reasonably without
* having a lot of duplicated code.
+ *
+ * vma->vm_mm->mmap_sem must be held on entry.
+ *
+ * If our return value has VM_FAULT_RETRY set, it's because
+ * lock_page_or_retry() returned 0.
+ * The mmap_sem has usually been released in this case.
+ * See __lock_page_or_retry() for the exception.
+ *
+ * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
+ * has not been released.
+ *
+ * We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
*/
int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
diff --git a/mm/gup.c b/mm/gup.c
index cc5a9e7adea7..91d044b1600d 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -258,6 +258,11 @@ unmap:
return ret;
}
+/*
+ * mmap_sem must be held on entry. If @nonblocking != NULL and
+ * *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
+ * If it is, *@nonblocking will be set to 0 and -EBUSY returned.
+ */
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
unsigned long address, unsigned int *flags, int *nonblocking)
{
@@ -373,7 +378,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* with a put_page() call when it is finished with. vmas will only
* remain valid while mmap_sem is held.
*
- * Must be called with mmap_sem held for read or write.
+ * Must be called with mmap_sem held. It may be released. See below.
*
* __get_user_pages walks a process's page tables and takes a reference to
* each struct page that each user address corresponds to at a given
@@ -396,7 +401,14 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
*
* If @nonblocking != NULL, __get_user_pages will not wait for disk IO
* or mmap_sem contention, and if waiting is needed to pin all pages,
- * *@nonblocking will be set to 0.
+ * *@nonblocking will be set to 0. Further, if @gup_flags does not
+ * include FOLL_NOWAIT, the mmap_sem will be released via up_read() in
+ * this case.
+ *
+ * A caller using such a combination of @nonblocking and @gup_flags
+ * must therefore hold the mmap_sem for reading only, and recognize
+ * when it's been released. Otherwise, it must be held for either
+ * reading or writing and will not be released.
*
* In most cases, get_user_pages or get_user_pages_fast should be used
* instead of __get_user_pages. __get_user_pages should be used only if
@@ -528,7 +540,7 @@ EXPORT_SYMBOL(__get_user_pages);
* such architectures, gup() will not be enough to make a subsequent access
* succeed.
*
- * This should be called with the mm_sem held for read.
+ * This has the same semantics wrt the @mm->mmap_sem as does filemap_fault().
*/
int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
unsigned long address, unsigned int fault_flags)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 33514d88fef9..02559efd9827 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -715,13 +715,20 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
unsigned long haddr, pmd_t *pmd,
struct page *page)
{
+ struct mem_cgroup *memcg;
pgtable_t pgtable;
spinlock_t *ptl;
VM_BUG_ON_PAGE(!PageCompound(page), page);
+
+ if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg))
+ return VM_FAULT_OOM;
+
pgtable = pte_alloc_one(mm, haddr);
- if (unlikely(!pgtable))
+ if (unlikely(!pgtable)) {
+ mem_cgroup_cancel_charge(page, memcg);
return VM_FAULT_OOM;
+ }
clear_huge_page(page, haddr, HPAGE_PMD_NR);
/*
@@ -734,7 +741,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_none(*pmd))) {
spin_unlock(ptl);
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
put_page(page);
pte_free(mm, pgtable);
} else {
@@ -742,6 +749,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, haddr);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, haddr, pmd, entry);
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
@@ -827,13 +836,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
- if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) {
- put_page(page);
- count_vm_event(THP_FAULT_FALLBACK);
- return VM_FAULT_FALLBACK;
- }
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
- mem_cgroup_uncharge_page(page);
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -979,6 +982,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
struct page *page,
unsigned long haddr)
{
+ struct mem_cgroup *memcg;
spinlock_t *ptl;
pgtable_t pgtable;
pmd_t _pmd;
@@ -999,20 +1003,21 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
__GFP_OTHER_NODE,
vma, address, page_to_nid(page));
if (unlikely(!pages[i] ||
- mem_cgroup_charge_anon(pages[i], mm,
- GFP_KERNEL))) {
+ mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL,
+ &memcg))) {
if (pages[i])
put_page(pages[i]);
- mem_cgroup_uncharge_start();
while (--i >= 0) {
- mem_cgroup_uncharge_page(pages[i]);
+ memcg = (void *)page_private(pages[i]);
+ set_page_private(pages[i], 0);
+ mem_cgroup_cancel_charge(pages[i], memcg);
put_page(pages[i]);
}
- mem_cgroup_uncharge_end();
kfree(pages);
ret |= VM_FAULT_OOM;
goto out;
}
+ set_page_private(pages[i], (unsigned long)memcg);
}
for (i = 0; i < HPAGE_PMD_NR; i++) {
@@ -1041,7 +1046,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
pte_t *pte, entry;
entry = mk_pte(pages[i], vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ memcg = (void *)page_private(pages[i]);
+ set_page_private(pages[i], 0);
page_add_new_anon_rmap(pages[i], vma, haddr);
+ mem_cgroup_commit_charge(pages[i], memcg, false);
+ lru_cache_add_active_or_unevictable(pages[i], vma);
pte = pte_offset_map(&_pmd, haddr);
VM_BUG_ON(!pte_none(*pte));
set_pte_at(mm, haddr, pte, entry);
@@ -1065,12 +1074,12 @@ out:
out_free_pages:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- mem_cgroup_uncharge_start();
for (i = 0; i < HPAGE_PMD_NR; i++) {
- mem_cgroup_uncharge_page(pages[i]);
+ memcg = (void *)page_private(pages[i]);
+ set_page_private(pages[i], 0);
+ mem_cgroup_cancel_charge(pages[i], memcg);
put_page(pages[i]);
}
- mem_cgroup_uncharge_end();
kfree(pages);
goto out;
}
@@ -1081,6 +1090,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
spinlock_t *ptl;
int ret = 0;
struct page *page = NULL, *new_page;
+ struct mem_cgroup *memcg;
unsigned long haddr;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -1132,7 +1142,8 @@ alloc:
goto out;
}
- if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) {
+ if (unlikely(mem_cgroup_try_charge(new_page, mm,
+ GFP_TRANSHUGE, &memcg))) {
put_page(new_page);
if (page) {
split_huge_page(page);
@@ -1161,7 +1172,7 @@ alloc:
put_user_huge_page(page);
if (unlikely(!pmd_same(*pmd, orig_pmd))) {
spin_unlock(ptl);
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
put_page(new_page);
goto out_mn;
} else {
@@ -1170,6 +1181,8 @@ alloc:
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
pmdp_clear_flush(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
set_pmd_at(mm, haddr, pmd, entry);
update_mmu_cache_pmd(vma, address, pmd);
if (!page) {
@@ -1681,7 +1694,7 @@ static void __split_huge_page_refcount(struct page *page,
&page_tail->_count);
/* after clearing PageTail the gup refcount can be released */
- smp_mb();
+ smp_mb__after_atomic();
/*
* retain hwpoison flag of the poisoned tail page:
@@ -1775,6 +1788,8 @@ static int __split_huge_page_map(struct page *page,
if (pmd) {
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
pmd_populate(mm, &_pmd, pgtable);
+ if (pmd_write(*pmd))
+ BUG_ON(page_mapcount(page) != 1);
haddr = address;
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
@@ -1784,8 +1799,6 @@ static int __split_huge_page_map(struct page *page,
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
if (!pmd_write(*pmd))
entry = pte_wrprotect(entry);
- else
- BUG_ON(page_mapcount(page) != 1);
if (!pmd_young(*pmd))
entry = pte_mkold(entry);
if (pmd_numa(*pmd))
@@ -2389,6 +2402,7 @@ static void collapse_huge_page(struct mm_struct *mm,
spinlock_t *pmd_ptl, *pte_ptl;
int isolated;
unsigned long hstart, hend;
+ struct mem_cgroup *memcg;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -2399,7 +2413,8 @@ static void collapse_huge_page(struct mm_struct *mm,
if (!new_page)
return;
- if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)))
+ if (unlikely(mem_cgroup_try_charge(new_page, mm,
+ GFP_TRANSHUGE, &memcg)))
return;
/*
@@ -2486,6 +2501,8 @@ static void collapse_huge_page(struct mm_struct *mm,
spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
page_add_new_anon_rmap(new_page, vma, address);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, address, pmd, _pmd);
update_mmu_cache_pmd(vma, address, pmd);
@@ -2499,7 +2516,7 @@ out_up_write:
return;
out:
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
goto out_up_write;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2024bbd573d2..a8d4155eb019 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,7 +35,6 @@
#include <linux/node.h>
#include "internal.h"
-const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
unsigned long hugepages_treat_as_movable;
int hugetlb_max_hstate __read_mostly;
@@ -1734,21 +1733,13 @@ static ssize_t nr_hugepages_show_common(struct kobject *kobj,
return sprintf(buf, "%lu\n", nr_huge_pages);
}
-static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
- struct kobject *kobj, struct kobj_attribute *attr,
- const char *buf, size_t len)
+static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
+ struct hstate *h, int nid,
+ unsigned long count, size_t len)
{
int err;
- int nid;
- unsigned long count;
- struct hstate *h;
NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
- err = kstrtoul(buf, 10, &count);
- if (err)
- goto out;
-
- h = kobj_to_hstate(kobj, &nid);
if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
err = -EINVAL;
goto out;
@@ -1784,6 +1775,23 @@ out:
return err;
}
+static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
+ struct kobject *kobj, const char *buf,
+ size_t len)
+{
+ struct hstate *h;
+ unsigned long count;
+ int nid;
+ int err;
+
+ err = kstrtoul(buf, 10, &count);
+ if (err)
+ return err;
+
+ h = kobj_to_hstate(kobj, &nid);
+ return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len);
+}
+
static ssize_t nr_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -1793,7 +1801,7 @@ static ssize_t nr_hugepages_show(struct kobject *kobj,
static ssize_t nr_hugepages_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf, size_t len)
{
- return nr_hugepages_store_common(false, kobj, attr, buf, len);
+ return nr_hugepages_store_common(false, kobj, buf, len);
}
HSTATE_ATTR(nr_hugepages);
@@ -1812,7 +1820,7 @@ static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf, size_t len)
{
- return nr_hugepages_store_common(true, kobj, attr, buf, len);
+ return nr_hugepages_store_common(true, kobj, buf, len);
}
HSTATE_ATTR(nr_hugepages_mempolicy);
#endif
@@ -2248,36 +2256,21 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
void __user *buffer, size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
- unsigned long tmp;
+ unsigned long tmp = h->max_huge_pages;
int ret;
if (!hugepages_supported())
return -ENOTSUPP;
- tmp = h->max_huge_pages;
-
- if (write && hstate_is_gigantic(h) && !gigantic_page_supported())
- return -EINVAL;
-
table->data = &tmp;
table->maxlen = sizeof(unsigned long);
ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
if (ret)
goto out;
- if (write) {
- NODEMASK_ALLOC(nodemask_t, nodes_allowed,
- GFP_KERNEL | __GFP_NORETRY);
- if (!(obey_mempolicy &&
- init_nodemask_of_mempolicy(nodes_allowed))) {
- NODEMASK_FREE(nodes_allowed);
- nodes_allowed = &node_states[N_MEMORY];
- }
- h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
-
- if (nodes_allowed != &node_states[N_MEMORY])
- NODEMASK_FREE(nodes_allowed);
- }
+ if (write)
+ ret = __nr_hugepages_store_common(obey_mempolicy, h,
+ NUMA_NO_NODE, tmp, *length);
out:
return ret;
}
@@ -2753,8 +2746,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
* from other VMAs and let the children be SIGKILLed if they are faulting the
* same region.
*/
-static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
- struct page *page, unsigned long address)
+static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct page *page, unsigned long address)
{
struct hstate *h = hstate_vma(vma);
struct vm_area_struct *iter_vma;
@@ -2793,8 +2786,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
address + huge_page_size(h), page);
}
mutex_unlock(&mapping->i_mmap_mutex);
-
- return 1;
}
/*
@@ -2809,7 +2800,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
{
struct hstate *h = hstate_vma(vma);
struct page *old_page, *new_page;
- int outside_reserve = 0;
+ int ret = 0, outside_reserve = 0;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
@@ -2839,14 +2830,14 @@ retry_avoidcopy:
page_cache_get(old_page);
- /* Drop page table lock as buddy allocator may be called */
+ /*
+ * Drop page table lock as buddy allocator may be called. It will
+ * be acquired again before returning to the caller, as expected.
+ */
spin_unlock(ptl);
new_page = alloc_huge_page(vma, address, outside_reserve);
if (IS_ERR(new_page)) {
- long err = PTR_ERR(new_page);
- page_cache_release(old_page);
-
/*
* If a process owning a MAP_PRIVATE mapping fails to COW,
* it is due to references held by a child and an insufficient
@@ -2855,29 +2846,25 @@ retry_avoidcopy:
* may get SIGKILLed if it later faults.
*/
if (outside_reserve) {
+ page_cache_release(old_page);
BUG_ON(huge_pte_none(pte));
- if (unmap_ref_private(mm, vma, old_page, address)) {
- BUG_ON(huge_pte_none(pte));
- spin_lock(ptl);
- ptep = huge_pte_offset(mm, address & huge_page_mask(h));
- if (likely(ptep &&
- pte_same(huge_ptep_get(ptep), pte)))
- goto retry_avoidcopy;
- /*
- * race occurs while re-acquiring page table
- * lock, and our job is done.
- */
- return 0;
- }
- WARN_ON_ONCE(1);
+ unmap_ref_private(mm, vma, old_page, address);
+ BUG_ON(huge_pte_none(pte));
+ spin_lock(ptl);
+ ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+ if (likely(ptep &&
+ pte_same(huge_ptep_get(ptep), pte)))
+ goto retry_avoidcopy;
+ /*
+ * race occurs while re-acquiring page table
+ * lock, and our job is done.
+ */
+ return 0;
}
- /* Caller expects lock to be held */
- spin_lock(ptl);
- if (err == -ENOMEM)
- return VM_FAULT_OOM;
- else
- return VM_FAULT_SIGBUS;
+ ret = (PTR_ERR(new_page) == -ENOMEM) ?
+ VM_FAULT_OOM : VM_FAULT_SIGBUS;
+ goto out_release_old;
}
/*
@@ -2885,11 +2872,8 @@ retry_avoidcopy:
* anon_vma prepared.
*/
if (unlikely(anon_vma_prepare(vma))) {
- page_cache_release(new_page);
- page_cache_release(old_page);
- /* Caller expects lock to be held */
- spin_lock(ptl);
- return VM_FAULT_OOM;
+ ret = VM_FAULT_OOM;
+ goto out_release_all;
}
copy_user_huge_page(new_page, old_page, address, vma,
@@ -2899,6 +2883,7 @@ retry_avoidcopy:
mmun_start = address & huge_page_mask(h);
mmun_end = mmun_start + huge_page_size(h);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
/*
* Retake the page table lock to check for racing updates
* before the page tables are altered
@@ -2919,12 +2904,13 @@ retry_avoidcopy:
}
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+out_release_all:
page_cache_release(new_page);
+out_release_old:
page_cache_release(old_page);
- /* Caller expects lock to be held */
- spin_lock(ptl);
- return 0;
+ spin_lock(ptl); /* Caller expects lock to be held */
+ return ret;
}
/* Return the pagecache page at a given address within a VMA */
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index 95487c71cad5..329caf56df22 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -72,8 +72,7 @@ DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");
static void pfn_inject_exit(void)
{
- if (hwpoison_dir)
- debugfs_remove_recursive(hwpoison_dir);
+ debugfs_remove_recursive(hwpoison_dir);
}
static int pfn_inject_init(void)
diff --git a/mm/internal.h b/mm/internal.h
index 7f22a11fcc66..a1b651b11c5f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -247,7 +247,7 @@ static inline void mlock_migrate_page(struct page *new, struct page *old) { }
static inline struct page *mem_map_offset(struct page *base, int offset)
{
if (unlikely(offset >= MAX_ORDER_NR_PAGES))
- return pfn_to_page(page_to_pfn(base) + offset);
+ return nth_page(base, offset);
return base + offset;
}
diff --git a/mm/madvise.c b/mm/madvise.c
index a402f8fdc68e..0938b30da4ab 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -292,9 +292,6 @@ static long madvise_dontneed(struct vm_area_struct *vma,
/*
* Application wants to free up the pages and associated backing store.
* This is effectively punching a hole into the middle of a file.
- *
- * NOTE: Currently, only shmfs/tmpfs is supported for this operation.
- * Other filesystems return -ENOSYS.
*/
static long madvise_remove(struct vm_area_struct *vma,
struct vm_area_struct **prev,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 45c10c6fc3ce..85dd94f2ecce 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -754,9 +754,11 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
struct mem_cgroup_tree_per_zone *mctz)
{
- spin_lock(&mctz->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mctz->lock, flags);
__mem_cgroup_remove_exceeded(mz, mctz);
- spin_unlock(&mctz->lock);
+ spin_unlock_irqrestore(&mctz->lock, flags);
}
@@ -779,7 +781,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
* mem is over its softlimit.
*/
if (excess || mz->on_tree) {
- spin_lock(&mctz->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mctz->lock, flags);
/* if on-tree, remove it */
if (mz->on_tree)
__mem_cgroup_remove_exceeded(mz, mctz);
@@ -788,7 +792,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
* If excess is 0, no tree ops.
*/
__mem_cgroup_insert_exceeded(mz, mctz, excess);
- spin_unlock(&mctz->lock);
+ spin_unlock_irqrestore(&mctz->lock, flags);
}
}
}
@@ -839,9 +843,9 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
{
struct mem_cgroup_per_zone *mz;
- spin_lock(&mctz->lock);
+ spin_lock_irq(&mctz->lock);
mz = __mem_cgroup_largest_soft_limit_node(mctz);
- spin_unlock(&mctz->lock);
+ spin_unlock_irq(&mctz->lock);
return mz;
}
@@ -882,13 +886,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
return val;
}
-static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
- bool charge)
-{
- int val = (charge) ? 1 : -1;
- this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
-}
-
static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
enum mem_cgroup_events_index idx)
{
@@ -909,13 +906,13 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
struct page *page,
- bool anon, int nr_pages)
+ int nr_pages)
{
/*
* Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
* counted as CACHE even if it's on ANON LRU.
*/
- if (anon)
+ if (PageAnon(page))
__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
nr_pages);
else
@@ -1013,7 +1010,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
*/
static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
{
- preempt_disable();
/* threshold event is triggered in finer grain than soft limit */
if (unlikely(mem_cgroup_event_ratelimit(memcg,
MEM_CGROUP_TARGET_THRESH))) {
@@ -1026,8 +1022,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
do_numainfo = mem_cgroup_event_ratelimit(memcg,
MEM_CGROUP_TARGET_NUMAINFO);
#endif
- preempt_enable();
-
mem_cgroup_threshold(memcg);
if (unlikely(do_softlimit))
mem_cgroup_update_tree(memcg, page);
@@ -1035,8 +1029,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
if (unlikely(do_numainfo))
atomic_inc(&memcg->numainfo_events);
#endif
- } else
- preempt_enable();
+ }
}
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
@@ -1347,20 +1340,6 @@ out:
return lruvec;
}
-/*
- * Following LRU functions are allowed to be used without PCG_LOCK.
- * Operations are called by routine of global LRU independently from memcg.
- * What we have to take care of here is validness of pc->mem_cgroup.
- *
- * Changes to pc->mem_cgroup happens when
- * 1. charge
- * 2. moving account
- * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
- * It is added to LRU before charge.
- * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
- * When moving account, the page is not on LRU. It's isolated.
- */
-
/**
* mem_cgroup_page_lruvec - return lruvec for adding an lru page
* @page: the page
@@ -2261,22 +2240,14 @@ cleanup:
*
* Notes: Race condition
*
- * We usually use lock_page_cgroup() for accessing page_cgroup member but
- * it tends to be costly. But considering some conditions, we doesn't need
- * to do so _always_.
+ * Charging occurs during page instantiation, while the page is
+ * unmapped and locked in page migration, or while the page table is
+ * locked in THP migration. No race is possible.
*
- * Considering "charge", lock_page_cgroup() is not required because all
- * file-stat operations happen after a page is attached to radix-tree. There
- * are no race with "charge".
+ * Uncharge happens to pages with zero references, no race possible.
*
- * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup
- * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even
- * if there are race with "uncharge". Statistics itself is properly handled
- * by flags.
- *
- * Considering "move", this is an only case we see a race. To make the race
- * small, we check memcg->moving_account and detect there are possibility
- * of race or not. If there is, we take a lock.
+ * Charge moving between groups is protected by checking mm->moving
+ * account and taking the move_lock in the slowpath.
*/
void __mem_cgroup_begin_update_page_stat(struct page *page,
@@ -2551,55 +2522,63 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
return NOTIFY_OK;
}
-
-/* See mem_cgroup_try_charge() for details */
-enum {
- CHARGE_OK, /* success */
- CHARGE_RETRY, /* need to retry but retry is not bad */
- CHARGE_NOMEM, /* we can't do more. return -ENOMEM */
- CHARGE_WOULDBLOCK, /* GFP_WAIT wasn't set and no enough res. */
-};
-
-static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
- unsigned int nr_pages, unsigned int min_pages,
- bool invoke_oom)
+static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ unsigned int nr_pages)
{
- unsigned long csize = nr_pages * PAGE_SIZE;
+ unsigned int batch = max(CHARGE_BATCH, nr_pages);
+ int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup *mem_over_limit;
struct res_counter *fail_res;
+ unsigned long nr_reclaimed;
unsigned long flags = 0;
- int ret;
+ unsigned long long size;
+ int ret = 0;
- ret = res_counter_charge(&memcg->res, csize, &fail_res);
+retry:
+ if (consume_stock(memcg, nr_pages))
+ goto done;
- if (likely(!ret)) {
+ size = batch * PAGE_SIZE;
+ if (!res_counter_charge(&memcg->res, size, &fail_res)) {
if (!do_swap_account)
- return CHARGE_OK;
- ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
- if (likely(!ret))
- return CHARGE_OK;
-
- res_counter_uncharge(&memcg->res, csize);
+ goto done_restock;
+ if (!res_counter_charge(&memcg->memsw, size, &fail_res))
+ goto done_restock;
+ res_counter_uncharge(&memcg->res, size);
mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
flags |= MEM_CGROUP_RECLAIM_NOSWAP;
} else
mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
+
+ if (batch > nr_pages) {
+ batch = nr_pages;
+ goto retry;
+ }
+
/*
- * Never reclaim on behalf of optional batching, retry with a
- * single page instead.
+ * Unlike in global OOM situations, memcg is not in a physical
+ * memory shortage. Allow dying and OOM-killed tasks to
+ * bypass the last charges so that they can exit quickly and
+ * free their memory.
*/
- if (nr_pages > min_pages)
- return CHARGE_RETRY;
+ if (unlikely(test_thread_flag(TIF_MEMDIE) ||
+ fatal_signal_pending(current) ||
+ current->flags & PF_EXITING))
+ goto bypass;
+
+ if (unlikely(task_in_memcg_oom(current)))
+ goto nomem;
if (!(gfp_mask & __GFP_WAIT))
- return CHARGE_WOULDBLOCK;
+ goto nomem;
- if (gfp_mask & __GFP_NORETRY)
- return CHARGE_NOMEM;
+ nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
+
+ if (mem_cgroup_margin(mem_over_limit) >= batch)
+ goto retry;
- ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
- if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
- return CHARGE_RETRY;
+ if (gfp_mask & __GFP_NORETRY)
+ goto nomem;
/*
* Even though the limit is exceeded at this point, reclaim
* may have been able to free some pages. Retry the charge
@@ -2609,142 +2588,47 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
* unlikely to succeed so close to the limit, and we fall back
* to regular pages anyway in case of failure.
*/
- if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret)
- return CHARGE_RETRY;
-
+ if (nr_reclaimed && batch <= (1 << PAGE_ALLOC_COSTLY_ORDER))
+ goto retry;
/*
* At task move, charge accounts can be doubly counted. So, it's
* better to wait until the end of task_move if something is going on.
*/
if (mem_cgroup_wait_acct_move(mem_over_limit))
- return CHARGE_RETRY;
-
- if (invoke_oom)
- mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize));
-
- return CHARGE_NOMEM;
-}
-
-/**
- * mem_cgroup_try_charge - try charging a memcg
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns 0 if @memcg was charged successfully, -EINTR if the charge
- * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
- */
-static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
- gfp_t gfp_mask,
- unsigned int nr_pages,
- bool oom)
-{
- unsigned int batch = max(CHARGE_BATCH, nr_pages);
- int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
- int ret;
-
- if (mem_cgroup_is_root(memcg))
- goto done;
- /*
- * Unlike in global OOM situations, memcg is not in a physical
- * memory shortage. Allow dying and OOM-killed tasks to
- * bypass the last charges so that they can exit quickly and
- * free their memory.
- */
- if (unlikely(test_thread_flag(TIF_MEMDIE) ||
- fatal_signal_pending(current) ||
- current->flags & PF_EXITING))
- goto bypass;
+ goto retry;
- if (unlikely(task_in_memcg_oom(current)))
- goto nomem;
+ if (nr_retries--)
+ goto retry;
if (gfp_mask & __GFP_NOFAIL)
- oom = false;
-again:
- if (consume_stock(memcg, nr_pages))
- goto done;
-
- do {
- bool invoke_oom = oom && !nr_oom_retries;
-
- /* If killed, bypass charge */
- if (fatal_signal_pending(current))
- goto bypass;
+ goto bypass;
- ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
- nr_pages, invoke_oom);
- switch (ret) {
- case CHARGE_OK:
- break;
- case CHARGE_RETRY: /* not in OOM situation but retry */
- batch = nr_pages;
- goto again;
- case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
- goto nomem;
- case CHARGE_NOMEM: /* OOM routine works */
- if (!oom || invoke_oom)
- goto nomem;
- nr_oom_retries--;
- break;
- }
- } while (ret != CHARGE_OK);
+ if (fatal_signal_pending(current))
+ goto bypass;
- if (batch > nr_pages)
- refill_stock(memcg, batch - nr_pages);
-done:
- return 0;
+ mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(batch));
nomem:
if (!(gfp_mask & __GFP_NOFAIL))
return -ENOMEM;
bypass:
- return -EINTR;
-}
-
-/**
- * mem_cgroup_try_charge_mm - try charging a mm
- * @mm: mm_struct to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns the charged mem_cgroup associated with the given mm_struct or
- * NULL the charge failed.
- */
-static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
- gfp_t gfp_mask,
- unsigned int nr_pages,
- bool oom)
+ memcg = root_mem_cgroup;
+ ret = -EINTR;
+ goto retry;
-{
- struct mem_cgroup *memcg;
- int ret;
-
- memcg = get_mem_cgroup_from_mm(mm);
- ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
- css_put(&memcg->css);
- if (ret == -EINTR)
- memcg = root_mem_cgroup;
- else if (ret)
- memcg = NULL;
-
- return memcg;
+done_restock:
+ if (batch > nr_pages)
+ refill_stock(memcg, batch - nr_pages);
+done:
+ return ret;
}
-/*
- * Somemtimes we have to undo a charge we got by try_charge().
- * This function is for that and do uncharge, put css's refcnt.
- * gotten by try_charge().
- */
-static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
- unsigned int nr_pages)
+static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
{
- if (!mem_cgroup_is_root(memcg)) {
- unsigned long bytes = nr_pages * PAGE_SIZE;
+ unsigned long bytes = nr_pages * PAGE_SIZE;
- res_counter_uncharge(&memcg->res, bytes);
- if (do_swap_account)
- res_counter_uncharge(&memcg->memsw, bytes);
- }
+ res_counter_uncharge(&memcg->res, bytes);
+ if (do_swap_account)
+ res_counter_uncharge(&memcg->memsw, bytes);
}
/*
@@ -2756,9 +2640,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
{
unsigned long bytes = nr_pages * PAGE_SIZE;
- if (mem_cgroup_is_root(memcg))
- return;
-
res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
if (do_swap_account)
res_counter_uncharge_until(&memcg->memsw,
@@ -2779,6 +2660,16 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
return mem_cgroup_from_id(id);
}
+/*
+ * try_get_mem_cgroup_from_page - look up page's memcg association
+ * @page: the page
+ *
+ * Look up, get a css reference, and return the memcg that owns @page.
+ *
+ * The page must be locked to prevent racing with swap-in and page
+ * cache charges. If coming from an unlocked page table, the caller
+ * must ensure the page is on the LRU or this can race with charging.
+ */
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
{
struct mem_cgroup *memcg = NULL;
@@ -2789,7 +2680,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
VM_BUG_ON_PAGE(!PageLocked(page), page);
pc = lookup_page_cgroup(page);
- lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
memcg = pc->mem_cgroup;
if (memcg && !css_tryget_online(&memcg->css))
@@ -2803,23 +2693,17 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
memcg = NULL;
rcu_read_unlock();
}
- unlock_page_cgroup(pc);
return memcg;
}
-static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
- struct page *page,
- unsigned int nr_pages,
- enum charge_type ctype,
- bool lrucare)
+static void commit_charge(struct page *page, struct mem_cgroup *memcg,
+ unsigned int nr_pages, bool lrucare)
{
struct page_cgroup *pc = lookup_page_cgroup(page);
struct zone *uninitialized_var(zone);
- struct lruvec *lruvec;
bool was_on_lru = false;
- bool anon;
+ struct lruvec *lruvec;
- lock_page_cgroup(pc);
VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
/*
* we don't need page_cgroup_lock about tail pages, becase they are not
@@ -2841,16 +2725,22 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
}
}
- pc->mem_cgroup = memcg;
/*
- * We access a page_cgroup asynchronously without lock_page_cgroup().
- * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
- * is accessed after testing USED bit. To make pc->mem_cgroup visible
- * before USED bit, we need memory barrier here.
- * See mem_cgroup_add_lru_list(), etc.
+ * Nobody should be changing or seriously looking at
+ * pc->mem_cgroup and pc->flags at this point:
+ *
+ * - the page is uncharged
+ *
+ * - the page is off-LRU
+ *
+ * - an anonymous fault has exclusive page access, except for
+ * a locked page table
+ *
+ * - a page cache insertion, a swapin fault, or a migration
+ * have the page locked
*/
- smp_wmb();
- SetPageCgroupUsed(pc);
+ pc->mem_cgroup = memcg;
+ pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
if (lrucare) {
if (was_on_lru) {
@@ -2862,20 +2752,15 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
spin_unlock_irq(&zone->lru_lock);
}
- if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
- anon = true;
- else
- anon = false;
-
- mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
- unlock_page_cgroup(pc);
-
+ local_irq_disable();
+ mem_cgroup_charge_statistics(memcg, page, nr_pages);
/*
* "charge_statistics" updated event counter. Then, check it.
* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
* if they exceeds softlimit.
*/
memcg_check_events(memcg, page);
+ local_irq_enable();
}
static DEFINE_MUTEX(set_limit_mutex);
@@ -2937,22 +2822,21 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
if (ret)
return ret;
- ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
- oom_gfp_allowed(gfp));
+ ret = try_charge(memcg, gfp, size >> PAGE_SHIFT);
if (ret == -EINTR) {
/*
- * mem_cgroup_try_charge() chosed to bypass to root due to
- * OOM kill or fatal signal. Since our only options are to
- * either fail the allocation or charge it to this cgroup, do
- * it as a temporary condition. But we can't fail. From a
- * kmem/slab perspective, the cache has already been selected,
- * by mem_cgroup_kmem_get_cache(), so it is too late to change
+ * try_charge() chose to bypass to root due to OOM kill or
+ * fatal signal. Since our only options are to either fail
+ * the allocation or charge it to this cgroup, do it as a
+ * temporary condition. But we can't fail. From a kmem/slab
+ * perspective, the cache has already been selected, by
+ * mem_cgroup_kmem_get_cache(), so it is too late to change
* our minds.
*
* This condition will only trigger if the task entered
- * memcg_charge_kmem in a sane state, but was OOM-killed during
- * mem_cgroup_try_charge() above. Tasks that were already
- * dying when the allocation triggers should have been already
+ * memcg_charge_kmem in a sane state, but was OOM-killed
+ * during try_charge() above. Tasks that were already dying
+ * when the allocation triggers should have been already
* directed to the root cgroup in memcontrol.h
*/
res_counter_charge_nofail(&memcg->res, size, &fail_res);
@@ -3463,12 +3347,13 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
return;
}
-
+ /*
+ * The page is freshly allocated and not visible to any
+ * outside callers yet. Set up pc non-atomically.
+ */
pc = lookup_page_cgroup(page);
- lock_page_cgroup(pc);
pc->mem_cgroup = memcg;
- SetPageCgroupUsed(pc);
- unlock_page_cgroup(pc);
+ pc->flags = PCG_USED;
}
void __memcg_kmem_uncharge_pages(struct page *page, int order)
@@ -3478,19 +3363,11 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
pc = lookup_page_cgroup(page);
- /*
- * Fast unlocked return. Theoretically might have changed, have to
- * check again after locking.
- */
if (!PageCgroupUsed(pc))
return;
- lock_page_cgroup(pc);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- ClearPageCgroupUsed(pc);
- }
- unlock_page_cgroup(pc);
+ memcg = pc->mem_cgroup;
+ pc->flags = 0;
/*
* We trust that only if there is a memcg associated with the page, it
@@ -3510,7 +3387,6 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
/*
* Because tail pages are not marked as "used", set it. We're under
* zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -3531,8 +3407,7 @@ void mem_cgroup_split_huge_fixup(struct page *head)
for (i = 1; i < HPAGE_PMD_NR; i++) {
pc = head_pc + i;
pc->mem_cgroup = memcg;
- smp_wmb();/* see __commit_charge() */
- pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+ pc->flags = head_pc->flags;
}
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
HPAGE_PMD_NR);
@@ -3562,7 +3437,6 @@ static int mem_cgroup_move_account(struct page *page,
{
unsigned long flags;
int ret;
- bool anon = PageAnon(page);
VM_BUG_ON(from == to);
VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -3576,15 +3450,13 @@ static int mem_cgroup_move_account(struct page *page,
if (nr_pages > 1 && !PageTransHuge(page))
goto out;
- lock_page_cgroup(pc);
-
ret = -EINVAL;
if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
- goto unlock;
+ goto out;
move_lock_mem_cgroup(from, &flags);
- if (!anon && page_mapped(page)) {
+ if (!PageAnon(page) && page_mapped(page)) {
__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
nr_pages);
__this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
@@ -3598,20 +3470,23 @@ static int mem_cgroup_move_account(struct page *page,
nr_pages);
}
- mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
+ /*
+ * It is safe to change pc->mem_cgroup here because the page
+ * is referenced, charged, and isolated - we can't race with
+ * uncharging, charging, migration, or LRU putback.
+ */
/* caller should have done css_get */
pc->mem_cgroup = to;
- mem_cgroup_charge_statistics(to, page, anon, nr_pages);
move_unlock_mem_cgroup(from, &flags);
ret = 0;
-unlock:
- unlock_page_cgroup(pc);
- /*
- * check events
- */
+
+ local_irq_disable();
+ mem_cgroup_charge_statistics(to, page, nr_pages);
memcg_check_events(to, page);
+ mem_cgroup_charge_statistics(from, page, -nr_pages);
memcg_check_events(from, page);
+ local_irq_enable();
out:
return ret;
}
@@ -3682,456 +3557,12 @@ out:
return ret;
}
-int mem_cgroup_charge_anon(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask)
-{
- unsigned int nr_pages = 1;
- struct mem_cgroup *memcg;
- bool oom = true;
-
- if (mem_cgroup_disabled())
- return 0;
-
- VM_BUG_ON_PAGE(page_mapped(page), page);
- VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
- VM_BUG_ON(!mm);
-
- if (PageTransHuge(page)) {
- nr_pages <<= compound_order(page);
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- /*
- * Never OOM-kill a process for a huge page. The
- * fault handler will fall back to regular pages.
- */
- oom = false;
- }
-
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
- if (!memcg)
- return -ENOMEM;
- __mem_cgroup_commit_charge(memcg, page, nr_pages,
- MEM_CGROUP_CHARGE_TYPE_ANON, false);
- return 0;
-}
-
-/*
- * While swap-in, try_charge -> commit or cancel, the page is locked.
- * And when try_charge() successfully returns, one refcnt to memcg without
- * struct page_cgroup is acquired. This refcnt will be consumed by
- * "commit()" or removed by "cancel()"
- */
-static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
- struct page *page,
- gfp_t mask,
- struct mem_cgroup **memcgp)
-{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
- int ret;
-
- pc = lookup_page_cgroup(page);
- /*
- * Every swap fault against a single page tries to charge the
- * page, bail as early as possible. shmem_unuse() encounters
- * already charged pages, too. The USED bit is protected by
- * the page lock, which serializes swap cache removal, which
- * in turn serializes uncharging.
- */
- if (PageCgroupUsed(pc))
- goto out;
- if (do_swap_account)
- memcg = try_get_mem_cgroup_from_page(page);
- if (!memcg)
- memcg = get_mem_cgroup_from_mm(mm);
- ret = mem_cgroup_try_charge(memcg, mask, 1, true);
- css_put(&memcg->css);
- if (ret == -EINTR)
- memcg = root_mem_cgroup;
- else if (ret)
- return ret;
-out:
- *memcgp = memcg;
- return 0;
-}
-
-int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
- gfp_t gfp_mask, struct mem_cgroup **memcgp)
-{
- if (mem_cgroup_disabled()) {
- *memcgp = NULL;
- return 0;
- }
- /*
- * A racing thread's fault, or swapoff, may have already
- * updated the pte, and even removed page from swap cache: in
- * those cases unuse_pte()'s pte_same() test will fail; but
- * there's also a KSM case which does need to charge the page.
- */
- if (!PageSwapCache(page)) {
- struct mem_cgroup *memcg;
-
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
- if (!memcg)
- return -ENOMEM;
- *memcgp = memcg;
- return 0;
- }
- return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
-}
-
-void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
-{
- if (mem_cgroup_disabled())
- return;
- if (!memcg)
- return;
- __mem_cgroup_cancel_charge(memcg, 1);
-}
-
-static void
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
- enum charge_type ctype)
-{
- if (mem_cgroup_disabled())
- return;
- if (!memcg)
- return;
-
- __mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
- /*
- * Now swap is on-memory. This means this page may be
- * counted both as mem and swap....double count.
- * Fix it by uncharging from memsw. Basically, this SwapCache is stable
- * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
- * may call delete_from_swap_cache() before reach here.
- */
- if (do_swap_account && PageSwapCache(page)) {
- swp_entry_t ent = {.val = page_private(page)};
- mem_cgroup_uncharge_swap(ent);
- }
-}
-
-void mem_cgroup_commit_charge_swapin(struct page *page,
- struct mem_cgroup *memcg)
-{
- __mem_cgroup_commit_charge_swapin(page, memcg,
- MEM_CGROUP_CHARGE_TYPE_ANON);
-}
-
-int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
-{
- enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
- struct mem_cgroup *memcg;
- int ret;
-
- if (mem_cgroup_disabled())
- return 0;
- if (PageCompound(page))
- return 0;
-
- if (PageSwapCache(page)) { /* shmem */
- ret = __mem_cgroup_try_charge_swapin(mm, page,
- gfp_mask, &memcg);
- if (ret)
- return ret;
- __mem_cgroup_commit_charge_swapin(page, memcg, type);
- return 0;
- }
-
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
- if (!memcg)
- return -ENOMEM;
- __mem_cgroup_commit_charge(memcg, page, 1, type, false);
- return 0;
-}
-
-static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
- unsigned int nr_pages,
- const enum charge_type ctype)
-{
- struct memcg_batch_info *batch = NULL;
- bool uncharge_memsw = true;
-
- /* If swapout, usage of swap doesn't decrease */
- if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
- uncharge_memsw = false;
-
- batch = &current->memcg_batch;
- /*
- * In usual, we do css_get() when we remember memcg pointer.
- * But in this case, we keep res->usage until end of a series of
- * uncharges. Then, it's ok to ignore memcg's refcnt.
- */
- if (!batch->memcg)
- batch->memcg = memcg;
- /*
- * do_batch > 0 when unmapping pages or inode invalidate/truncate.
- * In those cases, all pages freed continuously can be expected to be in
- * the same cgroup and we have chance to coalesce uncharges.
- * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
- * because we want to do uncharge as soon as possible.
- */
-
- if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
- goto direct_uncharge;
-
- if (nr_pages > 1)
- goto direct_uncharge;
-
- /*
- * In typical case, batch->memcg == mem. This means we can
- * merge a series of uncharges to an uncharge of res_counter.
- * If not, we uncharge res_counter ony by one.
- */
- if (batch->memcg != memcg)
- goto direct_uncharge;
- /* remember freed charge and uncharge it later */
- batch->nr_pages++;
- if (uncharge_memsw)
- batch->memsw_nr_pages++;
- return;
-direct_uncharge:
- res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
- if (uncharge_memsw)
- res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
- if (unlikely(batch->memcg != memcg))
- memcg_oom_recover(memcg);
-}
-
-/*
- * uncharge if !page_mapped(page)
- */
-static struct mem_cgroup *
-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
- bool end_migration)
-{
- struct mem_cgroup *memcg = NULL;
- unsigned int nr_pages = 1;
- struct page_cgroup *pc;
- bool anon;
-
- if (mem_cgroup_disabled())
- return NULL;
-
- if (PageTransHuge(page)) {
- nr_pages <<= compound_order(page);
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- }
- /*
- * Check if our page_cgroup is valid
- */
- pc = lookup_page_cgroup(page);
- if (unlikely(!PageCgroupUsed(pc)))
- return NULL;
-
- lock_page_cgroup(pc);
-
- memcg = pc->mem_cgroup;
-
- if (!PageCgroupUsed(pc))
- goto unlock_out;
-
- anon = PageAnon(page);
-
- switch (ctype) {
- case MEM_CGROUP_CHARGE_TYPE_ANON:
- /*
- * Generally PageAnon tells if it's the anon statistics to be
- * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
- * used before page reached the stage of being marked PageAnon.
- */
- anon = true;
- /* fallthrough */
- case MEM_CGROUP_CHARGE_TYPE_DROP:
- /* See mem_cgroup_prepare_migration() */
- if (page_mapped(page))
- goto unlock_out;
- /*
- * Pages under migration may not be uncharged. But
- * end_migration() /must/ be the one uncharging the
- * unused post-migration page and so it has to call
- * here with the migration bit still set. See the
- * res_counter handling below.
- */
- if (!end_migration && PageCgroupMigration(pc))
- goto unlock_out;
- break;
- case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
- if (!PageAnon(page)) { /* Shared memory */
- if (page->mapping && !page_is_file_cache(page))
- goto unlock_out;
- } else if (page_mapped(page)) /* Anon */
- goto unlock_out;
- break;
- default:
- break;
- }
-
- mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages);
-
- ClearPageCgroupUsed(pc);
- /*
- * pc->mem_cgroup is not cleared here. It will be accessed when it's
- * freed from LRU. This is safe because uncharged page is expected not
- * to be reused (freed soon). Exception is SwapCache, it's handled by
- * special functions.
- */
-
- unlock_page_cgroup(pc);
- /*
- * even after unlock, we have memcg->res.usage here and this memcg
- * will never be freed, so it's safe to call css_get().
- */
- memcg_check_events(memcg, page);
- if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
- mem_cgroup_swap_statistics(memcg, true);
- css_get(&memcg->css);
- }
- /*
- * Migration does not charge the res_counter for the
- * replacement page, so leave it alone when phasing out the
- * page that is unused after the migration.
- */
- if (!end_migration && !mem_cgroup_is_root(memcg))
- mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
-
- return memcg;
-
-unlock_out:
- unlock_page_cgroup(pc);
- return NULL;
-}
-
-void mem_cgroup_uncharge_page(struct page *page)
-{
- /* early check. */
- if (page_mapped(page))
- return;
- VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
- /*
- * If the page is in swap cache, uncharge should be deferred
- * to the swap path, which also properly accounts swap usage
- * and handles memcg lifetime.
- *
- * Note that this check is not stable and reclaim may add the
- * page to swap cache at any time after this. However, if the
- * page is not in swap cache by the time page->mapcount hits
- * 0, there won't be any page table references to the swap
- * slot, and reclaim will free it and not actually write the
- * page to disk.
- */
- if (PageSwapCache(page))
- return;
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
-}
-
-void mem_cgroup_uncharge_cache_page(struct page *page)
-{
- VM_BUG_ON_PAGE(page_mapped(page), page);
- VM_BUG_ON_PAGE(page->mapping, page);
- __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
-}
-
-/*
- * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
- * In that cases, pages are freed continuously and we can expect pages
- * are in the same memcg. All these calls itself limits the number of
- * pages freed at once, then uncharge_start/end() is called properly.
- * This may be called prural(2) times in a context,
- */
-
-void mem_cgroup_uncharge_start(void)
-{
- current->memcg_batch.do_batch++;
- /* We can do nest. */
- if (current->memcg_batch.do_batch == 1) {
- current->memcg_batch.memcg = NULL;
- current->memcg_batch.nr_pages = 0;
- current->memcg_batch.memsw_nr_pages = 0;
- }
-}
-
-void mem_cgroup_uncharge_end(void)
-{
- struct memcg_batch_info *batch = &current->memcg_batch;
-
- if (!batch->do_batch)
- return;
-
- batch->do_batch--;
- if (batch->do_batch) /* If stacked, do nothing. */
- return;
-
- if (!batch->memcg)
- return;
- /*
- * This "batch->memcg" is valid without any css_get/put etc...
- * bacause we hide charges behind us.
- */
- if (batch->nr_pages)
- res_counter_uncharge(&batch->memcg->res,
- batch->nr_pages * PAGE_SIZE);
- if (batch->memsw_nr_pages)
- res_counter_uncharge(&batch->memcg->memsw,
- batch->memsw_nr_pages * PAGE_SIZE);
- memcg_oom_recover(batch->memcg);
- /* forget this pointer (for sanity check) */
- batch->memcg = NULL;
-}
-
-#ifdef CONFIG_SWAP
-/*
- * called after __delete_from_swap_cache() and drop "page" account.
- * memcg information is recorded to swap_cgroup of "ent"
- */
-void
-mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
-{
- struct mem_cgroup *memcg;
- int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
-
- if (!swapout) /* this was a swap cache but the swap is unused ! */
- ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
-
- memcg = __mem_cgroup_uncharge_common(page, ctype, false);
-
- /*
- * record memcg information, if swapout && memcg != NULL,
- * css_get() was called in uncharge().
- */
- if (do_swap_account && swapout && memcg)
- swap_cgroup_record(ent, mem_cgroup_id(memcg));
-}
-#endif
-
#ifdef CONFIG_MEMCG_SWAP
-/*
- * called from swap_entry_free(). remove record in swap_cgroup and
- * uncharge "memsw" account.
- */
-void mem_cgroup_uncharge_swap(swp_entry_t ent)
+static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
+ bool charge)
{
- struct mem_cgroup *memcg;
- unsigned short id;
-
- if (!do_swap_account)
- return;
-
- id = swap_cgroup_record(ent, 0);
- rcu_read_lock();
- memcg = mem_cgroup_lookup(id);
- if (memcg) {
- /*
- * We uncharge this because swap is freed. This memcg can
- * be obsolete one. We avoid calling css_tryget_online().
- */
- if (!mem_cgroup_is_root(memcg))
- res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
- mem_cgroup_swap_statistics(memcg, false);
- css_put(&memcg->css);
- }
- rcu_read_unlock();
+ int val = (charge) ? 1 : -1;
+ this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
}
/**
@@ -4183,175 +3614,6 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
}
#endif
-/*
- * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
- * page belongs to.
- */
-void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
- struct mem_cgroup **memcgp)
-{
- struct mem_cgroup *memcg = NULL;
- unsigned int nr_pages = 1;
- struct page_cgroup *pc;
- enum charge_type ctype;
-
- *memcgp = NULL;
-
- if (mem_cgroup_disabled())
- return;
-
- if (PageTransHuge(page))
- nr_pages <<= compound_order(page);
-
- pc = lookup_page_cgroup(page);
- lock_page_cgroup(pc);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- css_get(&memcg->css);
- /*
- * At migrating an anonymous page, its mapcount goes down
- * to 0 and uncharge() will be called. But, even if it's fully
- * unmapped, migration may fail and this page has to be
- * charged again. We set MIGRATION flag here and delay uncharge
- * until end_migration() is called
- *
- * Corner Case Thinking
- * A)
- * When the old page was mapped as Anon and it's unmap-and-freed
- * while migration was ongoing.
- * If unmap finds the old page, uncharge() of it will be delayed
- * until end_migration(). If unmap finds a new page, it's
- * uncharged when it make mapcount to be 1->0. If unmap code
- * finds swap_migration_entry, the new page will not be mapped
- * and end_migration() will find it(mapcount==0).
- *
- * B)
- * When the old page was mapped but migraion fails, the kernel
- * remaps it. A charge for it is kept by MIGRATION flag even
- * if mapcount goes down to 0. We can do remap successfully
- * without charging it again.
- *
- * C)
- * The "old" page is under lock_page() until the end of
- * migration, so, the old page itself will not be swapped-out.
- * If the new page is swapped out before end_migraton, our
- * hook to usual swap-out path will catch the event.
- */
- if (PageAnon(page))
- SetPageCgroupMigration(pc);
- }
- unlock_page_cgroup(pc);
- /*
- * If the page is not charged at this point,
- * we return here.
- */
- if (!memcg)
- return;
-
- *memcgp = memcg;
- /*
- * We charge new page before it's used/mapped. So, even if unlock_page()
- * is called before end_migration, we can catch all events on this new
- * page. In the case new page is migrated but not remapped, new page's
- * mapcount will be finally 0 and we call uncharge in end_migration().
- */
- if (PageAnon(page))
- ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
- else
- ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
- /*
- * The page is committed to the memcg, but it's not actually
- * charged to the res_counter since we plan on replacing the
- * old one and only one page is going to be left afterwards.
- */
- __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
-}
-
-/* remove redundant charge if migration failed*/
-void mem_cgroup_end_migration(struct mem_cgroup *memcg,
- struct page *oldpage, struct page *newpage, bool migration_ok)
-{
- struct page *used, *unused;
- struct page_cgroup *pc;
- bool anon;
-
- if (!memcg)
- return;
-
- if (!migration_ok) {
- used = oldpage;
- unused = newpage;
- } else {
- used = newpage;
- unused = oldpage;
- }
- anon = PageAnon(used);
- __mem_cgroup_uncharge_common(unused,
- anon ? MEM_CGROUP_CHARGE_TYPE_ANON
- : MEM_CGROUP_CHARGE_TYPE_CACHE,
- true);
- css_put(&memcg->css);
- /*
- * We disallowed uncharge of pages under migration because mapcount
- * of the page goes down to zero, temporarly.
- * Clear the flag and check the page should be charged.
- */
- pc = lookup_page_cgroup(oldpage);
- lock_page_cgroup(pc);
- ClearPageCgroupMigration(pc);
- unlock_page_cgroup(pc);
-
- /*
- * If a page is a file cache, radix-tree replacement is very atomic
- * and we can skip this check. When it was an Anon page, its mapcount
- * goes down to 0. But because we added MIGRATION flage, it's not
- * uncharged yet. There are several case but page->mapcount check
- * and USED bit check in mem_cgroup_uncharge_page() will do enough
- * check. (see prepare_charge() also)
- */
- if (anon)
- mem_cgroup_uncharge_page(used);
-}
-
-/*
- * At replace page cache, newpage is not under any memcg but it's on
- * LRU. So, this function doesn't touch res_counter but handles LRU
- * in correct way. Both pages are locked so we cannot race with uncharge.
- */
-void mem_cgroup_replace_page_cache(struct page *oldpage,
- struct page *newpage)
-{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
- enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
-
- if (mem_cgroup_disabled())
- return;
-
- pc = lookup_page_cgroup(oldpage);
- /* fix accounting on old pages */
- lock_page_cgroup(pc);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- mem_cgroup_charge_statistics(memcg, oldpage, false, -1);
- ClearPageCgroupUsed(pc);
- }
- unlock_page_cgroup(pc);
-
- /*
- * When called from shmem_replace_page(), in some cases the
- * oldpage has already been charged, and in some cases not.
- */
- if (!memcg)
- return;
- /*
- * Even if newpage->mapping was NULL before starting replacement,
- * the newpage may be on LRU(or pagevec for LRU) already. We lock
- * LRU while we overwrite pc->mem_cgroup.
- */
- __mem_cgroup_commit_charge(memcg, newpage, 1, type, true);
-}
-
#ifdef CONFIG_DEBUG_VM
static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
{
@@ -4550,7 +3812,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_mask, &nr_scanned);
nr_reclaimed += reclaimed;
*total_scanned += nr_scanned;
- spin_lock(&mctz->lock);
+ spin_lock_irq(&mctz->lock);
/*
* If we failed to reclaim anything from this memory cgroup
@@ -4590,7 +3852,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
*/
/* If excess == 0, no tree ops */
__mem_cgroup_insert_exceeded(mz, mctz, excess);
- spin_unlock(&mctz->lock);
+ spin_unlock_irq(&mctz->lock);
css_put(&mz->memcg->css);
loop++;
/*
@@ -4817,78 +4079,24 @@ out:
return retval;
}
-
-static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
- enum mem_cgroup_stat_index idx)
-{
- struct mem_cgroup *iter;
- long val = 0;
-
- /* Per-cpu values can be negative, use a signed accumulator */
- for_each_mem_cgroup_tree(iter, memcg)
- val += mem_cgroup_read_stat(iter, idx);
-
- if (val < 0) /* race ? */
- val = 0;
- return val;
-}
-
-static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
-{
- u64 val;
-
- if (!mem_cgroup_is_root(memcg)) {
- if (!swap)
- return res_counter_read_u64(&memcg->res, RES_USAGE);
- else
- return res_counter_read_u64(&memcg->memsw, RES_USAGE);
- }
-
- /*
- * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
- * as well as in MEM_CGROUP_STAT_RSS_HUGE.
- */
- val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
-
- if (swap)
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
-
- return val << PAGE_SHIFT;
-}
-
static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
- struct cftype *cft)
+ struct cftype *cft)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
- u64 val;
- int name;
- enum res_type type;
-
- type = MEMFILE_TYPE(cft->private);
- name = MEMFILE_ATTR(cft->private);
+ enum res_type type = MEMFILE_TYPE(cft->private);
+ int name = MEMFILE_ATTR(cft->private);
switch (type) {
case _MEM:
- if (name == RES_USAGE)
- val = mem_cgroup_usage(memcg, false);
- else
- val = res_counter_read_u64(&memcg->res, name);
- break;
+ return res_counter_read_u64(&memcg->res, name);
case _MEMSWAP:
- if (name == RES_USAGE)
- val = mem_cgroup_usage(memcg, true);
- else
- val = res_counter_read_u64(&memcg->memsw, name);
- break;
+ return res_counter_read_u64(&memcg->memsw, name);
case _KMEM:
- val = res_counter_read_u64(&memcg->kmem, name);
+ return res_counter_read_u64(&memcg->kmem, name);
break;
default:
BUG();
}
-
- return val;
}
#ifdef CONFIG_MEMCG_KMEM
@@ -5350,7 +4558,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
if (!t)
goto unlock;
- usage = mem_cgroup_usage(memcg, swap);
+ if (!swap)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/*
* current_threshold points to threshold just below or equal to usage.
@@ -5442,15 +4653,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
mutex_lock(&memcg->thresholds_lock);
- if (type == _MEM)
+ if (type == _MEM) {
thresholds = &memcg->thresholds;
- else if (type == _MEMSWAP)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ } else if (type == _MEMSWAP) {
thresholds = &memcg->memsw_thresholds;
- else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ } else
BUG();
- usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
/* Check if a threshold crossed before adding a new one */
if (thresholds->primary)
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -5530,18 +4741,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
int i, j, size;
mutex_lock(&memcg->thresholds_lock);
- if (type == _MEM)
+
+ if (type == _MEM) {
thresholds = &memcg->thresholds;
- else if (type == _MEMSWAP)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ } else if (type == _MEMSWAP) {
thresholds = &memcg->memsw_thresholds;
- else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ } else
BUG();
if (!thresholds->primary)
goto unlock;
- usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
/* Check if a threshold crossed before removing */
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -6295,9 +5507,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
* core guarantees its existence.
*/
} else {
- res_counter_init(&memcg->res, NULL);
- res_counter_init(&memcg->memsw, NULL);
- res_counter_init(&memcg->kmem, NULL);
+ res_counter_init(&memcg->res, &root_mem_cgroup->res);
+ res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw);
+ res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
/*
* Deeper hierachy with use_hierarchy == false doesn't make
* much sense so let cgroup subsystem know about this
@@ -6431,55 +5643,38 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
#ifdef CONFIG_MMU
/* Handlers for move charge at task migration. */
-#define PRECHARGE_COUNT_AT_ONCE 256
static int mem_cgroup_do_precharge(unsigned long count)
{
- int ret = 0;
- int batch_count = PRECHARGE_COUNT_AT_ONCE;
- struct mem_cgroup *memcg = mc.to;
+ int ret;
- if (mem_cgroup_is_root(memcg)) {
+ /* Try a single bulk charge without reclaim first */
+ ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+ if (!ret) {
mc.precharge += count;
- /* we don't need css_get for root */
return ret;
}
- /* try to charge at once */
- if (count > 1) {
- struct res_counter *dummy;
- /*
- * "memcg" cannot be under rmdir() because we've already checked
- * by cgroup_lock_live_cgroup() that it is not removed and we
- * are still under the same cgroup_mutex. So we can postpone
- * css_get().
- */
- if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
- goto one_by_one;
- if (do_swap_account && res_counter_charge(&memcg->memsw,
- PAGE_SIZE * count, &dummy)) {
- res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
- goto one_by_one;
- }
- mc.precharge += count;
+ if (ret == -EINTR) {
+ cancel_charge(root_mem_cgroup, count);
return ret;
}
-one_by_one:
- /* fall back to one by one charge */
+
+ /* Try charges one by one with reclaim */
while (count--) {
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
- if (!batch_count--) {
- batch_count = PRECHARGE_COUNT_AT_ONCE;
- cond_resched();
- }
- ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
+ ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
+ /*
+ * In case of failure, any residual charges against
+ * mc.to will be dropped by mem_cgroup_clear_mc()
+ * later on. However, cancel any charges that are
+ * bypassed to root right away or they'll be lost.
+ */
+ if (ret == -EINTR)
+ cancel_charge(root_mem_cgroup, 1);
if (ret)
- /* mem_cgroup_clear_mc() will do uncharge later */
return ret;
mc.precharge++;
+ cond_resched();
}
- return ret;
+ return 0;
}
/**
@@ -6615,9 +5810,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
if (page) {
pc = lookup_page_cgroup(page);
/*
- * Do only loose check w/o page_cgroup lock.
- * mem_cgroup_move_account() checks the pc is valid or not under
- * the lock.
+ * Do only loose check w/o serialization.
+ * mem_cgroup_move_account() checks the pc is valid or
+ * not under LRU exclusion.
*/
if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
@@ -6742,7 +5937,7 @@ static void __mem_cgroup_clear_mc(void)
/* we must uncharge all the leftover precharges from mc.to */
if (mc.precharge) {
- __mem_cgroup_cancel_charge(mc.to, mc.precharge);
+ cancel_charge(mc.to, mc.precharge);
mc.precharge = 0;
}
/*
@@ -6750,27 +5945,24 @@ static void __mem_cgroup_clear_mc(void)
* we must uncharge here.
*/
if (mc.moved_charge) {
- __mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
+ cancel_charge(mc.from, mc.moved_charge);
mc.moved_charge = 0;
}
/* we must fixup refcnts and charges */
if (mc.moved_swap) {
/* uncharge swap account from the old cgroup */
- if (!mem_cgroup_is_root(mc.from))
- res_counter_uncharge(&mc.from->memsw,
- PAGE_SIZE * mc.moved_swap);
+ res_counter_uncharge(&mc.from->memsw,
+ PAGE_SIZE * mc.moved_swap);
for (i = 0; i < mc.moved_swap; i++)
css_put(&mc.from->css);
- if (!mem_cgroup_is_root(mc.to)) {
- /*
- * we charged both to->res and to->memsw, so we should
- * uncharge to->res.
- */
- res_counter_uncharge(&mc.to->res,
- PAGE_SIZE * mc.moved_swap);
- }
+ /*
+ * we charged both to->res and to->memsw, so we should
+ * uncharge to->res.
+ */
+ res_counter_uncharge(&mc.to->res,
+ PAGE_SIZE * mc.moved_swap);
/* we've already done css_get(mc.to) */
mc.moved_swap = 0;
}
@@ -7082,6 +6274,380 @@ static void __init enable_swap_cgroup(void)
}
#endif
+#ifdef CONFIG_MEMCG_SWAP
+/**
+ * mem_cgroup_swapout - transfer a memsw charge to swap
+ * @page: page whose memsw charge to transfer
+ * @entry: swap entry to move the charge to
+ *
+ * Transfer the memsw charge of @page to @entry.
+ */
+void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+{
+ struct page_cgroup *pc;
+ unsigned short oldid;
+
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ VM_BUG_ON_PAGE(page_count(page), page);
+
+ if (!do_swap_account)
+ return;
+
+ pc = lookup_page_cgroup(page);
+
+ /* Readahead page, never charged */
+ if (!PageCgroupUsed(pc))
+ return;
+
+ VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ VM_BUG_ON_PAGE(oldid, page);
+
+ pc->flags &= ~PCG_MEMSW;
+ css_get(&pc->mem_cgroup->css);
+ mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+}
+
+/**
+ * mem_cgroup_uncharge_swap - uncharge a swap entry
+ * @entry: swap entry to uncharge
+ *
+ * Drop the memsw charge associated with @entry.
+ */
+void mem_cgroup_uncharge_swap(swp_entry_t entry)
+{
+ struct mem_cgroup *memcg;
+ unsigned short id;
+
+ if (!do_swap_account)
+ return;
+
+ id = swap_cgroup_record(entry, 0);
+ rcu_read_lock();
+ memcg = mem_cgroup_lookup(id);
+ if (memcg) {
+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+ mem_cgroup_swap_statistics(memcg, false);
+ css_put(&memcg->css);
+ }
+ rcu_read_unlock();
+}
+#endif
+
+/**
+ * mem_cgroup_try_charge - try charging a page
+ * @page: page to charge
+ * @mm: mm context of the victim
+ * @gfp_mask: reclaim mode
+ * @memcgp: charged memcg return
+ *
+ * Try to charge @page to the memcg that @mm belongs to, reclaiming
+ * pages according to @gfp_mask if necessary.
+ *
+ * Returns 0 on success, with *@memcgp pointing to the charged memcg.
+ * Otherwise, an error code is returned.
+ *
+ * After page->mapping has been set up, the caller must finalize the
+ * charge with mem_cgroup_commit_charge(). Or abort the transaction
+ * with mem_cgroup_cancel_charge() in case page instantiation fails.
+ */
+int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask, struct mem_cgroup **memcgp)
+{
+ struct mem_cgroup *memcg = NULL;
+ unsigned int nr_pages = 1;
+ int ret = 0;
+
+ if (mem_cgroup_disabled())
+ goto out;
+
+ if (PageSwapCache(page)) {
+ struct page_cgroup *pc = lookup_page_cgroup(page);
+ /*
+ * Every swap fault against a single page tries to charge the
+ * page, bail as early as possible. shmem_unuse() encounters
+ * already charged pages, too. The USED bit is protected by
+ * the page lock, which serializes swap cache removal, which
+ * in turn serializes uncharging.
+ */
+ if (PageCgroupUsed(pc))
+ goto out;
+ }
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ }
+
+ if (do_swap_account && PageSwapCache(page))
+ memcg = try_get_mem_cgroup_from_page(page);
+ if (!memcg)
+ memcg = get_mem_cgroup_from_mm(mm);
+
+ ret = try_charge(memcg, gfp_mask, nr_pages);
+
+ css_put(&memcg->css);
+
+ if (ret == -EINTR) {
+ memcg = root_mem_cgroup;
+ ret = 0;
+ }
+out:
+ *memcgp = memcg;
+ return ret;
+}
+
+/**
+ * mem_cgroup_commit_charge - commit a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ * @lrucare: page might be on LRU already
+ *
+ * Finalize a charge transaction started by mem_cgroup_try_charge(),
+ * after page->mapping has been set up. This must happen atomically
+ * as part of the page instantiation, i.e. under the page table lock
+ * for anonymous pages, under the page lock for page and swap cache.
+ *
+ * In addition, the page must not be on the LRU during the commit, to
+ * prevent racing with task migration. If it might be, use @lrucare.
+ *
+ * Use mem_cgroup_cancel_charge() to cancel the transaction instead.
+ */
+void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+ bool lrucare)
+{
+ unsigned int nr_pages = 1;
+
+ VM_BUG_ON_PAGE(!page->mapping, page);
+ VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page);
+
+ if (mem_cgroup_disabled())
+ return;
+ /*
+ * Swap faults will attempt to charge the same page multiple
+ * times. But reuse_swap_page() might have removed the page
+ * from swapcache already, so we can't check PageSwapCache().
+ */
+ if (!memcg)
+ return;
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ }
+
+ commit_charge(page, memcg, nr_pages, lrucare);
+
+ if (do_swap_account && PageSwapCache(page)) {
+ swp_entry_t entry = { .val = page_private(page) };
+ /*
+ * The swap entry might not get freed for a long time,
+ * let's not wait for it. The page already received a
+ * memory+swap charge, drop the swap entry duplicate.
+ */
+ mem_cgroup_uncharge_swap(entry);
+ }
+}
+
+/**
+ * mem_cgroup_cancel_charge - cancel a page charge
+ * @page: page to charge
+ * @memcg: memcg to charge the page to
+ *
+ * Cancel a charge transaction started by mem_cgroup_try_charge().
+ */
+void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
+{
+ unsigned int nr_pages = 1;
+
+ if (mem_cgroup_disabled())
+ return;
+ /*
+ * Swap faults will attempt to charge the same page multiple
+ * times. But reuse_swap_page() might have removed the page
+ * from swapcache already, so we can't check PageSwapCache().
+ */
+ if (!memcg)
+ return;
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ }
+
+ cancel_charge(memcg, nr_pages);
+}
+
+static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
+ unsigned long nr_mem, unsigned long nr_memsw,
+ unsigned long nr_anon, unsigned long nr_file,
+ unsigned long nr_huge, struct page *dummy_page)
+{
+ unsigned long flags;
+
+ if (nr_mem)
+ res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE);
+ if (nr_memsw)
+ res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE);
+
+ memcg_oom_recover(memcg);
+
+ local_irq_save(flags);
+ __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
+ __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
+ __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
+ __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
+ __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+ memcg_check_events(memcg, dummy_page);
+ local_irq_restore(flags);
+}
+
+static void uncharge_list(struct list_head *page_list)
+{
+ struct mem_cgroup *memcg = NULL;
+ unsigned long nr_memsw = 0;
+ unsigned long nr_anon = 0;
+ unsigned long nr_file = 0;
+ unsigned long nr_huge = 0;
+ unsigned long pgpgout = 0;
+ unsigned long nr_mem = 0;
+ struct list_head *next;
+ struct page *page;
+
+ next = page_list->next;
+ do {
+ unsigned int nr_pages = 1;
+ struct page_cgroup *pc;
+
+ page = list_entry(next, struct page, lru);
+ next = page->lru.next;
+
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ VM_BUG_ON_PAGE(page_count(page), page);
+
+ pc = lookup_page_cgroup(page);
+ if (!PageCgroupUsed(pc))
+ continue;
+
+ /*
+ * Nobody should be changing or seriously looking at
+ * pc->mem_cgroup and pc->flags at this point, we have
+ * fully exclusive access to the page.
+ */
+
+ if (memcg != pc->mem_cgroup) {
+ if (memcg) {
+ uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+ nr_anon, nr_file, nr_huge, page);
+ pgpgout = nr_mem = nr_memsw = 0;
+ nr_anon = nr_file = nr_huge = 0;
+ }
+ memcg = pc->mem_cgroup;
+ }
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ nr_huge += nr_pages;
+ }
+
+ if (PageAnon(page))
+ nr_anon += nr_pages;
+ else
+ nr_file += nr_pages;
+
+ if (pc->flags & PCG_MEM)
+ nr_mem += nr_pages;
+ if (pc->flags & PCG_MEMSW)
+ nr_memsw += nr_pages;
+ pc->flags = 0;
+
+ pgpgout++;
+ } while (next != page_list);
+
+ if (memcg)
+ uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+ nr_anon, nr_file, nr_huge, page);
+}
+
+/**
+ * mem_cgroup_uncharge - uncharge a page
+ * @page: page to uncharge
+ *
+ * Uncharge a page previously charged with mem_cgroup_try_charge() and
+ * mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge(struct page *page)
+{
+ if (mem_cgroup_disabled())
+ return;
+
+ INIT_LIST_HEAD(&page->lru);
+ uncharge_list(&page->lru);
+}
+
+/**
+ * mem_cgroup_uncharge_list - uncharge a list of page
+ * @page_list: list of pages to uncharge
+ *
+ * Uncharge a list of pages previously charged with
+ * mem_cgroup_try_charge() and mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge_list(struct list_head *page_list)
+{
+ if (mem_cgroup_disabled())
+ return;
+
+ if (!list_empty(page_list))
+ uncharge_list(page_list);
+}
+
+/**
+ * mem_cgroup_migrate - migrate a charge to another page
+ * @oldpage: currently charged page
+ * @newpage: page to transfer the charge to
+ * @lrucare: page might be on LRU already
+ *
+ * Migrate the charge from @oldpage to @newpage.
+ *
+ * Both pages must be locked, @newpage->mapping must be set up.
+ */
+void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+ bool lrucare)
+{
+ unsigned int nr_pages = 1;
+ struct page_cgroup *pc;
+
+ VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
+ VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+ VM_BUG_ON_PAGE(PageLRU(oldpage), oldpage);
+ VM_BUG_ON_PAGE(PageLRU(newpage), newpage);
+ VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
+
+ if (mem_cgroup_disabled())
+ return;
+
+ pc = lookup_page_cgroup(oldpage);
+ if (!PageCgroupUsed(pc))
+ return;
+
+ /* Already migrated */
+ if (!(pc->flags & PCG_MEM))
+ return;
+
+ VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
+ pc->flags &= ~(PCG_MEM | PCG_MEMSW);
+
+ if (PageTransHuge(oldpage)) {
+ nr_pages <<= compound_order(oldpage);
+ VM_BUG_ON_PAGE(!PageTransHuge(oldpage), oldpage);
+ VM_BUG_ON_PAGE(!PageTransHuge(newpage), newpage);
+ }
+
+ commit_charge(newpage, pc->mem_cgroup, nr_pages, lrucare);
+}
+
/*
* subsys_initcall() for memory controller.
*
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c6399e328931..e3e2f007946e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -435,7 +435,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
if (av == NULL) /* Not actually mapped anymore */
return;
- pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff = page_to_pgoff(page);
read_lock(&tasklist_lock);
for_each_process (tsk) {
struct anon_vma_chain *vmac;
@@ -469,7 +469,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
mutex_lock(&mapping->i_mmap_mutex);
read_lock(&tasklist_lock);
for_each_process(tsk) {
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff_t pgoff = page_to_pgoff(page);
struct task_struct *t = task_early_kill(tsk, force_early);
if (!t)
@@ -1165,6 +1165,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
lock_page(hpage);
/*
+ * The page could have changed compound pages during the locking.
+ * If this happens just bail out.
+ */
+ if (compound_head(p) != hpage) {
+ action_result(pfn, "different compound page after locking", IGNORED);
+ res = -EBUSY;
+ goto out;
+ }
+
+ /*
* We use page flags to determine what action should be taken, but
* the flags can be modified by the error containment action. One
* example is an mlocked page, where PG_mlocked is cleared by
diff --git a/mm/memory.c b/mm/memory.c
index d67fd9fcf1f2..eb37dfb8f631 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -884,7 +884,7 @@ out_set_pte:
return 0;
}
-int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
@@ -1292,7 +1292,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
details = NULL;
BUG_ON(addr >= end);
- mem_cgroup_uncharge_start();
tlb_start_vma(tlb, vma);
pgd = pgd_offset(vma->vm_mm, addr);
do {
@@ -1302,7 +1301,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
next = zap_pud_range(tlb, vma, pgd, addr, next, details);
} while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
- mem_cgroup_uncharge_end();
}
@@ -2049,6 +2047,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *dirty_page = NULL;
unsigned long mmun_start = 0; /* For mmu_notifiers */
unsigned long mmun_end = 0; /* For mmu_notifiers */
+ struct mem_cgroup *memcg;
old_page = vm_normal_page(vma, address, orig_pte);
if (!old_page) {
@@ -2204,7 +2203,7 @@ gotten:
}
__SetPageUptodate(new_page);
- if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
+ if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
goto oom_free_new;
mmun_start = address & PAGE_MASK;
@@ -2234,6 +2233,8 @@ gotten:
*/
ptep_clear_flush(vma, address, page_table);
page_add_new_anon_rmap(new_page, vma, address);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
/*
* We call the notify macro here because, when using secondary
* mmu page tables (such as kvm shadow page tables), we want the
@@ -2271,7 +2272,7 @@ gotten:
new_page = old_page;
ret |= VM_FAULT_WRITE;
} else
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
if (new_page)
page_cache_release(new_page);
@@ -2399,7 +2400,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * We return with the mmap_sem locked or unlocked in the same cases
+ * as does filemap_fault().
*/
static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -2407,10 +2411,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
{
spinlock_t *ptl;
struct page *page, *swapcache;
+ struct mem_cgroup *memcg;
swp_entry_t entry;
pte_t pte;
int locked;
- struct mem_cgroup *ptr;
int exclusive = 0;
int ret = 0;
@@ -2486,7 +2490,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_page;
}
- if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
+ if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) {
ret = VM_FAULT_OOM;
goto out_page;
}
@@ -2511,10 +2515,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
* while the page is counted on swap but not yet in mapcount i.e.
* before page_add_anon_rmap() and swap_free(); try_to_free_swap()
* must be called after the swap_free(), or it will never succeed.
- * Because delete_from_swap_page() may be called by reuse_swap_page(),
- * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
- * in page->private. In this case, a record in swap_cgroup is silently
- * discarded at swap_free().
*/
inc_mm_counter_fast(mm, MM_ANONPAGES);
@@ -2530,12 +2530,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (pte_swp_soft_dirty(orig_pte))
pte = pte_mksoft_dirty(pte);
set_pte_at(mm, address, page_table, pte);
- if (page == swapcache)
+ if (page == swapcache) {
do_page_add_anon_rmap(page, vma, address, exclusive);
- else /* ksm created a completely new copy */
+ mem_cgroup_commit_charge(page, memcg, true);
+ } else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, address);
- /* It's better to call commit-charge after rmap is established */
- mem_cgroup_commit_charge_swapin(page, ptr);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
+ }
swap_free(entry);
if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2568,7 +2570,7 @@ unlock:
out:
return ret;
out_nomap:
- mem_cgroup_cancel_charge_swapin(ptr);
+ mem_cgroup_cancel_charge(page, memcg);
pte_unmap_unlock(page_table, ptl);
out_page:
unlock_page(page);
@@ -2624,6 +2626,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags)
{
+ struct mem_cgroup *memcg;
struct page *page;
spinlock_t *ptl;
pte_t entry;
@@ -2657,7 +2660,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
*/
__SetPageUptodate(page);
- if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL))
+ if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
goto oom_free_page;
entry = mk_pte(page, vma->vm_page_prot);
@@ -2670,6 +2673,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
setpte:
set_pte_at(mm, address, page_table, entry);
@@ -2679,7 +2684,7 @@ unlock:
pte_unmap_unlock(page_table, ptl);
return 0;
release:
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
page_cache_release(page);
goto unlock;
oom_free_page:
@@ -2688,6 +2693,11 @@ oom:
return VM_FAULT_OOM;
}
+/*
+ * The mmap_sem must have been held on entry, and may have been
+ * released depending on flags and vma->vm_ops->fault() return value.
+ * See filemap_fault() and __lock_page_retry().
+ */
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
pgoff_t pgoff, unsigned int flags, struct page **page)
{
@@ -2744,7 +2754,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
- pte_mksoft_dirty(entry);
+ entry = pte_mksoft_dirty(entry);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
@@ -2882,7 +2892,8 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* if page by the offset is not ready to be mapped (cold cache or
* something).
*/
- if (vma->vm_ops->map_pages && fault_around_pages() > 1) {
+ if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) &&
+ fault_around_pages() > 1) {
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
do_fault_around(vma, address, pte, pgoff, flags);
if (!pte_same(*pte, orig_pte))
@@ -2913,6 +2924,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{
struct page *fault_page, *new_page;
+ struct mem_cgroup *memcg;
spinlock_t *ptl;
pte_t *pte;
int ret;
@@ -2924,7 +2936,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (!new_page)
return VM_FAULT_OOM;
- if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) {
+ if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) {
page_cache_release(new_page);
return VM_FAULT_OOM;
}
@@ -2944,12 +2956,14 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
goto uncharge_out;
}
do_set_pte(vma, address, new_page, pte, true, true);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
unlock_page(fault_page);
page_cache_release(fault_page);
return ret;
uncharge_out:
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
page_cache_release(new_page);
return ret;
}
@@ -2965,6 +2979,8 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int dirtied = 0;
int ret, tmp;
+ WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
+
ret = __do_fault(vma, address, pgoff, flags, &fault_page);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
@@ -2995,6 +3011,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (set_page_dirty(fault_page))
dirtied = 1;
+ /*
+ * Take a local copy of the address_space - page.mapping may be zeroed
+ * by truncate after unlock_page(). The address_space itself remains
+ * pinned by vma->vm_file's reference. We rely on unlock_page()'s
+ * release semantics to prevent the compiler from undoing this copying.
+ */
mapping = fault_page->mapping;
unlock_page(fault_page);
if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) {
@@ -3012,6 +3034,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return ret;
}
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults).
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
+ */
static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
@@ -3036,7 +3064,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -3168,7 +3198,10 @@ out:
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
@@ -3177,7 +3210,7 @@ static int handle_pte_fault(struct mm_struct *mm,
pte_t entry;
spinlock_t *ptl;
- entry = *pte;
+ entry = ACCESS_ONCE(*pte);
if (!pte_present(entry)) {
if (pte_none(entry)) {
if (vma->vm_ops) {
@@ -3228,6 +3261,9 @@ unlock:
/*
* By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
@@ -3309,6 +3345,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return handle_pte_fault(mm, vma, address, pte, pmd, flags);
}
+/*
+ * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
+ */
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 469bbf505f85..a3797d3fd8a4 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -284,8 +284,8 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
}
#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
-static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
- unsigned long end_pfn)
+static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn,
+ unsigned long end_pfn)
{
unsigned long old_zone_end_pfn;
@@ -427,8 +427,8 @@ out_fail:
return -1;
}
-static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
- unsigned long end_pfn)
+static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
+ unsigned long end_pfn)
{
unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat);
@@ -977,15 +977,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
zone = page_zone(pfn_to_page(pfn));
ret = -EINVAL;
- if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
+ if ((zone_idx(zone) > ZONE_NORMAL ||
+ online_type == MMOP_ONLINE_MOVABLE) &&
!can_online_high_movable(zone))
goto out;
- if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
+ if (online_type == MMOP_ONLINE_KERNEL &&
+ zone_idx(zone) == ZONE_MOVABLE) {
if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
goto out;
}
- if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
+ if (online_type == MMOP_ONLINE_MOVABLE &&
+ zone_idx(zone) == ZONE_MOVABLE - 1) {
if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
goto out;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 9e0beaa91845..7f5a42403fae 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -778,11 +778,14 @@ static int move_to_new_page(struct page *newpage, struct page *page,
rc = fallback_migrate_page(mapping, newpage, page, mode);
if (rc != MIGRATEPAGE_SUCCESS) {
- newpage->mapping = NULL;
+ if (!PageAnon(newpage))
+ newpage->mapping = NULL;
} else {
+ mem_cgroup_migrate(page, newpage, false);
if (remap_swapcache)
remove_migration_ptes(page, newpage);
- page->mapping = NULL;
+ if (!PageAnon(page))
+ page->mapping = NULL;
}
unlock_page(newpage);
@@ -795,7 +798,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
{
int rc = -EAGAIN;
int remap_swapcache = 1;
- struct mem_cgroup *mem;
struct anon_vma *anon_vma = NULL;
if (!trylock_page(page)) {
@@ -821,9 +823,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
lock_page(page);
}
- /* charge against new page */
- mem_cgroup_prepare_migration(page, newpage, &mem);
-
if (PageWriteback(page)) {
/*
* Only in the case of a full synchronous migration is it
@@ -833,10 +832,10 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
*/
if (mode != MIGRATE_SYNC) {
rc = -EBUSY;
- goto uncharge;
+ goto out_unlock;
}
if (!force)
- goto uncharge;
+ goto out_unlock;
wait_on_page_writeback(page);
}
/*
@@ -872,7 +871,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
*/
remap_swapcache = 0;
} else {
- goto uncharge;
+ goto out_unlock;
}
}
@@ -885,7 +884,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
* the page migration right away (proteced by page lock).
*/
rc = balloon_page_migrate(newpage, page, mode);
- goto uncharge;
+ goto out_unlock;
}
/*
@@ -904,7 +903,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
VM_BUG_ON_PAGE(PageAnon(page), page);
if (page_has_private(page)) {
try_to_free_buffers(page);
- goto uncharge;
+ goto out_unlock;
}
goto skip_unmap;
}
@@ -923,10 +922,7 @@ skip_unmap:
if (anon_vma)
put_anon_vma(anon_vma);
-uncharge:
- mem_cgroup_end_migration(mem, page, newpage,
- (rc == MIGRATEPAGE_SUCCESS ||
- rc == MIGRATEPAGE_BALLOON_SUCCESS));
+out_unlock:
unlock_page(page);
out:
return rc;
@@ -1785,7 +1781,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
pg_data_t *pgdat = NODE_DATA(node);
int isolated = 0;
struct page *new_page = NULL;
- struct mem_cgroup *memcg = NULL;
int page_lru = page_is_file_cache(page);
unsigned long mmun_start = address & HPAGE_PMD_MASK;
unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
@@ -1851,15 +1846,6 @@ fail_putback:
goto out_unlock;
}
- /*
- * Traditional migration needs to prepare the memcg charge
- * transaction early to prevent the old page from being
- * uncharged when installing migration entries. Here we can
- * save the potential rollback and start the charge transfer
- * only when migration is already known to end successfully.
- */
- mem_cgroup_prepare_migration(page, new_page, &memcg);
-
orig_entry = *pmd;
entry = mk_pmd(new_page, vma->vm_page_prot);
entry = pmd_mkhuge(entry);
@@ -1887,14 +1873,10 @@ fail_putback:
goto fail_putback;
}
+ mem_cgroup_migrate(page, new_page, false);
+
page_remove_rmap(page);
- /*
- * Finish the charge transaction under the page table lock to
- * prevent split_huge_page() from dividing up the charge
- * before it's fully transferred to the new page.
- */
- mem_cgroup_end_migration(memcg, page, new_page, true);
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
diff --git a/mm/mlock.c b/mm/mlock.c
index b1eb53634005..ce84cb0b83ef 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -210,12 +210,19 @@ out:
* @vma: target vma
* @start: start address
* @end: end address
+ * @nonblocking:
*
* This takes care of making the pages present too.
*
* return 0 on success, negative error code on error.
*
- * vma->vm_mm->mmap_sem must be held for at least read.
+ * vma->vm_mm->mmap_sem must be held.
+ *
+ * If @nonblocking is NULL, it may be held for read or write and will
+ * be unperturbed.
+ *
+ * If @nonblocking is non-NULL, it must held for read only and may be
+ * released. If it's released, *@nonblocking will be set to 0.
*/
long __mlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *nonblocking)
diff --git a/mm/mmap.c b/mm/mmap.c
index 129b847d30cc..64c9d736155c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -31,6 +31,7 @@
#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <linux/mmu_notifier.h>
+#include <linux/mmdebug.h>
#include <linux/perf_event.h>
#include <linux/audit.h>
#include <linux/khugepaged.h>
@@ -134,6 +135,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
{
unsigned long free, allowed, reserve;
+ VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
+ -(s64)vm_committed_as_batch * num_online_cpus(),
+ "memory commitment underflow");
+
vm_acct_memory(pages);
/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 518e2c3f4c75..e0c943014eb7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1306,9 +1306,9 @@ static inline void bdi_dirty_limits(struct backing_dev_info *bdi,
*bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
if (bdi_bg_thresh)
- *bdi_bg_thresh = div_u64((u64)*bdi_thresh *
- background_thresh,
- dirty_thresh);
+ *bdi_bg_thresh = dirty_thresh ? div_u64((u64)*bdi_thresh *
+ background_thresh,
+ dirty_thresh) : 0;
/*
* In order to avoid the stacked BDI deadlock we need
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0ea758b898fd..b99643d42a93 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -680,9 +680,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
int migratetype = 0;
int batch_free = 0;
int to_free = count;
+ unsigned long nr_scanned;
spin_lock(&zone->lock);
- zone->pages_scanned = 0;
+ nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+ if (nr_scanned)
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
while (to_free) {
struct page *page;
@@ -731,8 +734,11 @@ static void free_one_page(struct zone *zone,
unsigned int order,
int migratetype)
{
+ unsigned long nr_scanned;
spin_lock(&zone->lock);
- zone->pages_scanned = 0;
+ nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
+ if (nr_scanned)
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
__free_one_page(page, pfn, zone, order, migratetype);
if (unlikely(!is_migrate_isolate(migratetype)))
@@ -1257,15 +1263,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
unsigned long flags;
- int to_drain;
- unsigned long batch;
+ int to_drain, batch;
local_irq_save(flags);
batch = ACCESS_ONCE(pcp->batch);
- if (pcp->count >= batch)
- to_drain = batch;
- else
- to_drain = pcp->count;
+ to_drain = min(pcp->count, batch);
if (to_drain > 0) {
free_pcppages_bulk(zone, to_drain, pcp);
pcp->count -= to_drain;
@@ -1610,6 +1612,9 @@ again:
}
__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+ if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 &&
+ !zone_is_fair_depleted(zone))
+ zone_set_flag(zone, ZONE_FAIR_DEPLETED);
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1712,7 +1717,6 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
{
/* free_pages my go negative - that's OK */
long min = mark;
- long lowmem_reserve = z->lowmem_reserve[classzone_idx];
int o;
long free_cma = 0;
@@ -1727,7 +1731,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
#endif
- if (free_pages - free_cma <= min + lowmem_reserve)
+ if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
return false;
for (o = 0; o < order; o++) {
/* At the next order, this order's pages become unavailable */
@@ -1922,6 +1926,18 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
#endif /* CONFIG_NUMA */
+static void reset_alloc_batches(struct zone *preferred_zone)
+{
+ struct zone *zone = preferred_zone->zone_pgdat->node_zones;
+
+ do {
+ mod_zone_page_state(zone, NR_ALLOC_BATCH,
+ high_wmark_pages(zone) - low_wmark_pages(zone) -
+ atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
+ zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
+ } while (zone++ != preferred_zone);
+}
+
/*
* get_page_from_freelist goes through the zonelist trying to allocate
* a page.
@@ -1939,8 +1955,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
int did_zlc_setup = 0; /* just call zlc_setup() one time */
bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
(gfp_mask & __GFP_WRITE);
+ int nr_fair_skipped = 0;
+ bool zonelist_rescan;
zonelist_scan:
+ zonelist_rescan = false;
+
/*
* Scan zonelist, looking for a zone with enough free.
* See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
@@ -1964,9 +1984,11 @@ zonelist_scan:
*/
if (alloc_flags & ALLOC_FAIR) {
if (!zone_local(preferred_zone, zone))
+ break;
+ if (zone_is_fair_depleted(zone)) {
+ nr_fair_skipped++;
continue;
- if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
- continue;
+ }
}
/*
* When allocating a page cache page for writing, we
@@ -2072,13 +2094,7 @@ this_zone_full:
zlc_mark_zone_full(zonelist, z);
}
- if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) {
- /* Disable zlc cache for second zonelist scan */
- zlc_active = 0;
- goto zonelist_scan;
- }
-
- if (page)
+ if (page) {
/*
* page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
* necessary to allocate the page. The expectation is
@@ -2087,8 +2103,37 @@ this_zone_full:
* for !PFMEMALLOC purposes.
*/
page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
+ return page;
+ }
- return page;
+ /*
+ * The first pass makes sure allocations are spread fairly within the
+ * local node. However, the local node might have free pages left
+ * after the fairness batches are exhausted, and remote zones haven't
+ * even been considered yet. Try once more without fairness, and
+ * include remote zones now, before entering the slowpath and waking
+ * kswapd: prefer spilling to a remote zone over swapping locally.
+ */
+ if (alloc_flags & ALLOC_FAIR) {
+ alloc_flags &= ~ALLOC_FAIR;
+ if (nr_fair_skipped) {
+ zonelist_rescan = true;
+ reset_alloc_batches(preferred_zone);
+ }
+ if (nr_online_nodes > 1)
+ zonelist_rescan = true;
+ }
+
+ if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
+ /* Disable zlc cache for second zonelist scan */
+ zlc_active = 0;
+ zonelist_rescan = true;
+ }
+
+ if (zonelist_rescan)
+ goto zonelist_scan;
+
+ return NULL;
}
/*
@@ -2409,28 +2454,6 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
return page;
}
-static void reset_alloc_batches(struct zonelist *zonelist,
- enum zone_type high_zoneidx,
- struct zone *preferred_zone)
-{
- struct zoneref *z;
- struct zone *zone;
-
- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
- /*
- * Only reset the batches of zones that were actually
- * considered in the fairness pass, we don't want to
- * trash fairness information for zones that are not
- * actually part of this zonelist's round-robin cycle.
- */
- if (!zone_local(preferred_zone, zone))
- continue;
- mod_zone_page_state(zone, NR_ALLOC_BATCH,
- high_wmark_pages(zone) - low_wmark_pages(zone) -
- atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
- }
-}
-
static void wake_all_kswapds(unsigned int order,
struct zonelist *zonelist,
enum zone_type high_zoneidx,
@@ -2766,29 +2789,12 @@ retry_cpuset:
if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
#endif
-retry:
/* First allocation attempt */
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
zonelist, high_zoneidx, alloc_flags,
preferred_zone, classzone_idx, migratetype);
if (unlikely(!page)) {
/*
- * The first pass makes sure allocations are spread
- * fairly within the local node. However, the local
- * node might have free pages left after the fairness
- * batches are exhausted, and remote zones haven't
- * even been considered yet. Try once more without
- * fairness, and include remote zones now, before
- * entering the slowpath and waking kswapd: prefer
- * spilling to a remote zone over swapping locally.
- */
- if (alloc_flags & ALLOC_FAIR) {
- reset_alloc_batches(zonelist, high_zoneidx,
- preferred_zone);
- alloc_flags &= ~ALLOC_FAIR;
- goto retry;
- }
- /*
* Runtime PM, block IO and its error handling path
* can deadlock because I/O on the device might not
* complete.
@@ -2962,7 +2968,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
* Note this is not alloc_pages_exact_node() which allocates on a specific node,
* but is not exact.
*/
-void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
{
unsigned order = get_order(size);
struct page *p = alloc_pages_node(nid, gfp_mask, order);
@@ -2970,7 +2976,6 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
return NULL;
return make_alloc_exact((unsigned long)page_address(p), order, size);
}
-EXPORT_SYMBOL(alloc_pages_exact_nid);
/**
* free_pages_exact - release memory allocated via alloc_pages_exact()
@@ -3052,7 +3057,7 @@ static inline void show_node(struct zone *zone)
void si_meminfo(struct sysinfo *val)
{
val->totalram = totalram_pages;
- val->sharedram = 0;
+ val->sharedram = global_page_state(NR_SHMEM);
val->freeram = global_page_state(NR_FREE_PAGES);
val->bufferram = nr_blockdev_pages();
val->totalhigh = totalhigh_pages;
@@ -3072,6 +3077,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
managed_pages += pgdat->node_zones[zone_type].managed_pages;
val->totalram = managed_pages;
+ val->sharedram = node_page_state(nid, NR_SHMEM);
val->freeram = node_page_state(nid, NR_FREE_PAGES);
#ifdef CONFIG_HIGHMEM
val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
@@ -3253,12 +3259,12 @@ void show_free_areas(unsigned int filter)
K(zone_page_state(zone, NR_BOUNCE)),
K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
- zone->pages_scanned,
+ K(zone_page_state(zone, NR_PAGES_SCANNED)),
(!zone_reclaimable(zone) ? "yes" : "no")
);
printk("lowmem_reserve[]:");
for (i = 0; i < MAX_NR_ZONES; i++)
- printk(" %lu", zone->lowmem_reserve[i]);
+ printk(" %ld", zone->lowmem_reserve[i]);
printk("\n");
}
@@ -4969,6 +4975,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
pgdat->node_start_pfn = node_start_pfn;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+ printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid,
+ (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1);
#endif
calculate_node_totalpages(pgdat, start_pfn, end_pfn,
zones_size, zholes_size);
@@ -5579,7 +5587,7 @@ static void calculate_totalreserve_pages(void)
for_each_online_pgdat(pgdat) {
for (i = 0; i < MAX_NR_ZONES; i++) {
struct zone *zone = pgdat->node_zones + i;
- unsigned long max = 0;
+ long max = 0;
/* Find valid and maximum lowmem_reserve in the zone */
for (j = i; j < MAX_NR_ZONES; j++) {
@@ -6062,10 +6070,11 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
}
/**
- * get_pageblock_flags_group - Return the requested group of flags for the pageblock_nr_pages block of pages
+ * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages
* @page: The page within the block of interest
- * @start_bitidx: The first bit of interest to retrieve
+ * @pfn: Page Number of the page
* @end_bitidx: The last bit of interest
+ * @mask: The mask for flags
* returns pageblock_bits flags
*/
unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
@@ -6091,9 +6100,10 @@ unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
/**
* set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
* @page: The page within the block of interest
- * @start_bitidx: The first bit of interest
- * @end_bitidx: The last bit of interest
* @flags: The flags to set
+ * @pfn: Page Number of the page
+ * @end_bitidx: The last bit of interest
+ * @mask: The mask for flags
*/
void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long pfn,
diff --git a/mm/readahead.c b/mm/readahead.c
index 0ca36a7770b1..17b9172ec37f 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -326,7 +326,6 @@ static unsigned long get_next_ra_size(struct file_ra_state *ra,
* - thrashing threshold in memory tight systems
*/
static pgoff_t count_history_pages(struct address_space *mapping,
- struct file_ra_state *ra,
pgoff_t offset, unsigned long max)
{
pgoff_t head;
@@ -349,7 +348,7 @@ static int try_context_readahead(struct address_space *mapping,
{
pgoff_t size;
- size = count_history_pages(mapping, ra, offset, max);
+ size = count_history_pages(mapping, offset, max);
/*
* not enough history pages:
diff --git a/mm/rmap.c b/mm/rmap.c
index b7e94ebbd09e..3e8491c504f8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -517,11 +517,7 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
static inline unsigned long
__vma_address(struct page *page, struct vm_area_struct *vma)
{
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-
- if (unlikely(is_vm_hugetlb_page(vma)))
- pgoff = page->index << huge_page_order(page_hstate(page));
-
+ pgoff_t pgoff = page_to_pgoff(page);
return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
}
@@ -1036,25 +1032,6 @@ void page_add_new_anon_rmap(struct page *page,
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
hpage_nr_pages(page));
__page_set_anon_rmap(page, vma, address, 1);
-
- VM_BUG_ON_PAGE(PageLRU(page), page);
- if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
- SetPageActive(page);
- lru_cache_add(page);
- return;
- }
-
- if (!TestSetPageMlocked(page)) {
- /*
- * We use the irq-unsafe __mod_zone_page_stat because this
- * counter is not modified from interrupt context, and the pte
- * lock is held(spinlock), which implies preemption disabled.
- */
- __mod_zone_page_state(page_zone(page), NR_MLOCK,
- hpage_nr_pages(page));
- count_vm_event(UNEVICTABLE_PGMLOCKED);
- }
- add_page_to_unevictable_list(page);
}
/**
@@ -1112,7 +1089,6 @@ void page_remove_rmap(struct page *page)
if (unlikely(PageHuge(page)))
goto out;
if (anon) {
- mem_cgroup_uncharge_page(page);
if (PageTransHuge(page))
__dec_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES);
@@ -1639,7 +1615,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
{
struct anon_vma *anon_vma;
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ pgoff_t pgoff = page_to_pgoff(page);
struct anon_vma_chain *avc;
int ret = SWAP_AGAIN;
@@ -1680,7 +1656,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
{
struct address_space *mapping = page->mapping;
- pgoff_t pgoff = page->index << compound_order(page);
+ pgoff_t pgoff = page_to_pgoff(page);
struct vm_area_struct *vma;
int ret = SWAP_AGAIN;
diff --git a/mm/shmem.c b/mm/shmem.c
index 1140f49b6ded..b16d3e7e8d9a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -85,7 +85,7 @@ static struct vfsmount *shm_mnt;
* a time): we would prefer not to enlarge the shmem inode just for that.
*/
struct shmem_falloc {
- int mode; /* FALLOC_FL mode currently operating */
+ wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
pgoff_t start; /* start of range currently being fallocated */
pgoff_t next; /* the next page offset to be fallocated */
pgoff_t nr_falloced; /* how many new pages have been fallocated */
@@ -149,6 +149,19 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
vm_unacct_memory(VM_ACCT(size));
}
+static inline int shmem_reacct_size(unsigned long flags,
+ loff_t oldsize, loff_t newsize)
+{
+ if (!(flags & VM_NORESERVE)) {
+ if (VM_ACCT(newsize) > VM_ACCT(oldsize))
+ return security_vm_enough_memory_mm(current->mm,
+ VM_ACCT(newsize) - VM_ACCT(oldsize));
+ else if (VM_ACCT(newsize) < VM_ACCT(oldsize))
+ vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize));
+ }
+ return 0;
+}
+
/*
* ... whereas tmpfs objects are accounted incrementally as
* pages are allocated, in order to allow huge sparse files.
@@ -280,7 +293,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
*/
static int shmem_add_to_page_cache(struct page *page,
struct address_space *mapping,
- pgoff_t index, gfp_t gfp, void *expected)
+ pgoff_t index, void *expected)
{
int error;
@@ -406,7 +419,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
pvec.pages, indices);
if (!pvec.nr)
break;
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -434,7 +446,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
@@ -468,24 +479,20 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
return;
index = start;
- for ( ; ; ) {
+ while (index < end) {
cond_resched();
pvec.nr = find_get_entries(mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE),
pvec.pages, indices);
if (!pvec.nr) {
- if (index == start || unfalloc)
+ /* If all gone or hole-punch or unfalloc, we're done */
+ if (index == start || end != -1)
break;
+ /* But if truncating, restart to make sure all gone */
index = start;
continue;
}
- if ((index == start || unfalloc) && indices[0] >= end) {
- pagevec_remove_exceptionals(&pvec);
- pagevec_release(&pvec);
- break;
- }
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -496,8 +503,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
if (radix_tree_exceptional_entry(page)) {
if (unfalloc)
continue;
- nr_swaps_freed += !shmem_free_swap(mapping,
- index, page);
+ if (shmem_free_swap(mapping, index, page)) {
+ /* Swap was replaced by page: retry */
+ index--;
+ break;
+ }
+ nr_swaps_freed++;
continue;
}
@@ -506,13 +517,17 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
if (page->mapping == mapping) {
VM_BUG_ON_PAGE(PageWriteback(page), page);
truncate_inode_page(mapping, page);
+ } else {
+ /* Page was replaced by swap: retry */
+ unlock_page(page);
+ index--;
+ break;
}
}
unlock_page(page);
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
index++;
}
@@ -543,6 +558,10 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
loff_t newsize = attr->ia_size;
if (newsize != oldsize) {
+ error = shmem_reacct_size(SHMEM_I(inode)->flags,
+ oldsize, newsize);
+ if (error)
+ return error;
i_size_write(inode, newsize);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
}
@@ -598,7 +617,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
radswap = swp_to_radix_entry(swap);
index = radix_tree_locate_item(&mapping->page_tree, radswap);
if (index == -1)
- return 0;
+ return -EAGAIN; /* tell shmem_unuse we found nothing */
/*
* Move _head_ to start search for next from here.
@@ -643,7 +662,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
*/
if (!error)
error = shmem_add_to_page_cache(*pagep, mapping, index,
- GFP_NOWAIT, radswap);
+ radswap);
if (error != -ENOMEM) {
/*
* Truncation and eviction use free_swap_and_cache(), which
@@ -657,7 +676,6 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
spin_unlock(&info->lock);
swap_free(swap);
}
- error = 1; /* not an error, but entry was found */
}
return error;
}
@@ -669,7 +687,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
{
struct list_head *this, *next;
struct shmem_inode_info *info;
- int found = 0;
+ struct mem_cgroup *memcg;
int error = 0;
/*
@@ -684,26 +702,32 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
* the shmem_swaplist_mutex which might hold up shmem_writepage().
* Charged back to the user (not to caller) when swap account is used.
*/
- error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL);
+ error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg);
if (error)
goto out;
/* No radix_tree_preload: swap entry keeps a place for page in tree */
+ error = -EAGAIN;
mutex_lock(&shmem_swaplist_mutex);
list_for_each_safe(this, next, &shmem_swaplist) {
info = list_entry(this, struct shmem_inode_info, swaplist);
if (info->swapped)
- found = shmem_unuse_inode(info, swap, &page);
+ error = shmem_unuse_inode(info, swap, &page);
else
list_del_init(&info->swaplist);
cond_resched();
- if (found)
+ if (error != -EAGAIN)
break;
+ /* found nothing in this: move on to search the next */
}
mutex_unlock(&shmem_swaplist_mutex);
- if (found < 0)
- error = found;
+ if (error) {
+ if (error != -ENOMEM)
+ error = 0;
+ mem_cgroup_cancel_charge(page, memcg);
+ } else
+ mem_cgroup_commit_charge(page, memcg, true);
out:
unlock_page(page);
page_cache_release(page);
@@ -760,7 +784,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
spin_lock(&inode->i_lock);
shmem_falloc = inode->i_private;
if (shmem_falloc &&
- !shmem_falloc->mode &&
+ !shmem_falloc->waitq &&
index >= shmem_falloc->start &&
index < shmem_falloc->next)
shmem_falloc->nr_unswapped++;
@@ -807,7 +831,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
}
mutex_unlock(&shmem_swaplist_mutex);
- swapcache_free(swap, NULL);
+ swapcache_free(swap);
redirty:
set_page_dirty(page);
if (wbc->for_reclaim)
@@ -980,7 +1004,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
*/
oldpage = newpage;
} else {
- mem_cgroup_replace_page_cache(oldpage, newpage);
+ mem_cgroup_migrate(oldpage, newpage, false);
lru_cache_add_anon(newpage);
*pagep = newpage;
}
@@ -1007,6 +1031,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo;
+ struct mem_cgroup *memcg;
struct page *page;
swp_entry_t swap;
int error;
@@ -1085,11 +1110,10 @@ repeat:
goto failed;
}
- error = mem_cgroup_charge_file(page, current->mm,
- gfp & GFP_RECLAIM_MASK);
+ error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
- gfp, swp_to_radix_entry(swap));
+ swp_to_radix_entry(swap));
/*
* We already confirmed swap under page lock, and make
* no memory allocation here, so usually no possibility
@@ -1102,12 +1126,16 @@ repeat:
* Reset swap.val? No, leave it so "failed" goes back to
* "repeat": reading a hole and writing should succeed.
*/
- if (error)
+ if (error) {
+ mem_cgroup_cancel_charge(page, memcg);
delete_from_swap_cache(page);
+ }
}
if (error)
goto failed;
+ mem_cgroup_commit_charge(page, memcg, true);
+
spin_lock(&info->lock);
info->swapped--;
shmem_recalc_inode(inode);
@@ -1143,22 +1171,22 @@ repeat:
__SetPageSwapBacked(page);
__set_page_locked(page);
if (sgp == SGP_WRITE)
- init_page_accessed(page);
+ __SetPageReferenced(page);
- error = mem_cgroup_charge_file(page, current->mm,
- gfp & GFP_RECLAIM_MASK);
+ error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg);
if (error)
goto decused;
error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
- gfp, NULL);
+ NULL);
radix_tree_preload_end();
}
if (error) {
- mem_cgroup_uncharge_cache_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
goto decused;
}
+ mem_cgroup_commit_charge(page, memcg, false);
lru_cache_add_anon(page);
spin_lock(&info->lock);
@@ -1248,38 +1276,58 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
* Trinity finds that probing a hole which tmpfs is punching can
* prevent the hole-punch from ever completing: which in turn
* locks writers out with its hold on i_mutex. So refrain from
- * faulting pages into the hole while it's being punched, and
- * wait on i_mutex to be released if vmf->flags permits.
+ * faulting pages into the hole while it's being punched. Although
+ * shmem_undo_range() does remove the additions, it may be unable to
+ * keep up, as each new page needs its own unmap_mapping_range() call,
+ * and the i_mmap tree grows ever slower to scan if new vmas are added.
+ *
+ * It does not matter if we sometimes reach this check just before the
+ * hole-punch begins, so that one fault then races with the punch:
+ * we just need to make racing faults a rare case.
+ *
+ * The implementation below would be much simpler if we just used a
+ * standard mutex or completion: but we cannot take i_mutex in fault,
+ * and bloating every shmem inode for this unlikely case would be sad.
*/
if (unlikely(inode->i_private)) {
struct shmem_falloc *shmem_falloc;
spin_lock(&inode->i_lock);
shmem_falloc = inode->i_private;
- if (!shmem_falloc ||
- shmem_falloc->mode != FALLOC_FL_PUNCH_HOLE ||
- vmf->pgoff < shmem_falloc->start ||
- vmf->pgoff >= shmem_falloc->next)
- shmem_falloc = NULL;
- spin_unlock(&inode->i_lock);
- /*
- * i_lock has protected us from taking shmem_falloc seriously
- * once return from shmem_fallocate() went back up that stack.
- * i_lock does not serialize with i_mutex at all, but it does
- * not matter if sometimes we wait unnecessarily, or sometimes
- * miss out on waiting: we just need to make those cases rare.
- */
- if (shmem_falloc) {
+ if (shmem_falloc &&
+ shmem_falloc->waitq &&
+ vmf->pgoff >= shmem_falloc->start &&
+ vmf->pgoff < shmem_falloc->next) {
+ wait_queue_head_t *shmem_falloc_waitq;
+ DEFINE_WAIT(shmem_fault_wait);
+
+ ret = VM_FAULT_NOPAGE;
if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ /* It's polite to up mmap_sem if we can */
up_read(&vma->vm_mm->mmap_sem);
- mutex_lock(&inode->i_mutex);
- mutex_unlock(&inode->i_mutex);
- return VM_FAULT_RETRY;
+ ret = VM_FAULT_RETRY;
}
- /* cond_resched? Leave that to GUP or return to user */
- return VM_FAULT_NOPAGE;
+
+ shmem_falloc_waitq = shmem_falloc->waitq;
+ prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
+ TASK_KILLABLE);
+ spin_unlock(&inode->i_lock);
+ schedule();
+
+ /*
+ * shmem_falloc_waitq points into the shmem_fallocate()
+ * stack of the hole-punching task: shmem_falloc_waitq
+ * is usually invalid by the time we reach here, but
+ * finish_wait() does not dereference it in that case;
+ * though i_lock needed lest racing with wake_up_all().
+ */
+ spin_lock(&inode->i_lock);
+ finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
+ spin_unlock(&inode->i_lock);
+ return ret;
}
+ spin_unlock(&inode->i_lock);
}
error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
@@ -1774,13 +1822,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
mutex_lock(&inode->i_mutex);
- shmem_falloc.mode = mode & ~FALLOC_FL_KEEP_SIZE;
-
if (mode & FALLOC_FL_PUNCH_HOLE) {
struct address_space *mapping = file->f_mapping;
loff_t unmap_start = round_up(offset, PAGE_SIZE);
loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
+ shmem_falloc.waitq = &shmem_falloc_waitq;
shmem_falloc.start = unmap_start >> PAGE_SHIFT;
shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
spin_lock(&inode->i_lock);
@@ -1792,8 +1840,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1 + unmap_end - unmap_start, 0);
shmem_truncate_range(inode, offset, offset + len - 1);
/* No need to unmap again: hole-punching leaves COWed pages */
+
+ spin_lock(&inode->i_lock);
+ inode->i_private = NULL;
+ wake_up_all(&shmem_falloc_waitq);
+ spin_unlock(&inode->i_lock);
error = 0;
- goto undone;
+ goto out;
}
/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
@@ -1809,6 +1862,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
goto out;
}
+ shmem_falloc.waitq = NULL;
shmem_falloc.start = start;
shmem_falloc.next = start;
shmem_falloc.nr_falloced = 0;
@@ -2900,16 +2954,16 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
this.len = strlen(name);
this.hash = 0; /* will go */
sb = shm_mnt->mnt_sb;
+ path.mnt = mntget(shm_mnt);
path.dentry = d_alloc_pseudo(sb, &this);
if (!path.dentry)
goto put_memory;
d_set_d_op(path.dentry, &anon_ops);
- path.mnt = mntget(shm_mnt);
res = ERR_PTR(-ENOSPC);
inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags);
if (!inode)
- goto put_dentry;
+ goto put_memory;
inode->i_flags |= i_flags;
d_instantiate(path.dentry, inode);
@@ -2917,19 +2971,19 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
clear_nlink(inode); /* It is unlinked */
res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
if (IS_ERR(res))
- goto put_dentry;
+ goto put_path;
res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
&shmem_file_operations);
if (IS_ERR(res))
- goto put_dentry;
+ goto put_path;
return res;
-put_dentry:
- path_put(&path);
put_memory:
shmem_unacct_size(flags, size);
+put_path:
+ path_put(&path);
return res;
}
diff --git a/mm/slab.c b/mm/slab.c
index 3070b929a1bf..1351725f7936 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -191,7 +191,6 @@ struct array_cache {
unsigned int limit;
unsigned int batchcount;
unsigned int touched;
- spinlock_t lock;
void *entry[]; /*
* Must have this definition in here for the proper
* alignment of array_cache. Also simplifies accessing
@@ -203,6 +202,11 @@ struct array_cache {
*/
};
+struct alien_cache {
+ spinlock_t lock;
+ struct array_cache ac;
+};
+
#define SLAB_OBJ_PFMEMALLOC 1
static inline bool is_obj_pfmemalloc(void *objp)
{
@@ -242,7 +246,8 @@ static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
static int drain_freelist(struct kmem_cache *cache,
struct kmem_cache_node *n, int tofree);
static void free_block(struct kmem_cache *cachep, void **objpp, int len,
- int node);
+ int node, struct list_head *list);
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
static void cache_reap(struct work_struct *unused);
@@ -267,7 +272,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
#define MAKE_LIST(cachep, listp, slab, nodeid) \
do { \
INIT_LIST_HEAD(listp); \
- list_splice(&(cachep->node[nodeid]->slab), listp); \
+ list_splice(&get_node(cachep, nodeid)->slab, listp); \
} while (0)
#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
@@ -465,143 +470,6 @@ static struct kmem_cache kmem_cache_boot = {
.name = "kmem_cache",
};
-#define BAD_ALIEN_MAGIC 0x01020304ul
-
-#ifdef CONFIG_LOCKDEP
-
-/*
- * Slab sometimes uses the kmalloc slabs to store the slab headers
- * for other slabs "off slab".
- * The locking for this is tricky in that it nests within the locks
- * of all other slabs in a few places; to deal with this special
- * locking we put on-slab caches into a separate lock-class.
- *
- * We set lock class for alien array caches which are up during init.
- * The lock annotation will be lost if all cpus of a node goes down and
- * then comes back up during hotplug
- */
-static struct lock_class_key on_slab_l3_key;
-static struct lock_class_key on_slab_alc_key;
-
-static struct lock_class_key debugobj_l3_key;
-static struct lock_class_key debugobj_alc_key;
-
-static void slab_set_lock_classes(struct kmem_cache *cachep,
- struct lock_class_key *l3_key, struct lock_class_key *alc_key,
- int q)
-{
- struct array_cache **alc;
- struct kmem_cache_node *n;
- int r;
-
- n = cachep->node[q];
- if (!n)
- return;
-
- lockdep_set_class(&n->list_lock, l3_key);
- alc = n->alien;
- /*
- * FIXME: This check for BAD_ALIEN_MAGIC
- * should go away when common slab code is taught to
- * work even without alien caches.
- * Currently, non NUMA code returns BAD_ALIEN_MAGIC
- * for alloc_alien_cache,
- */
- if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
- return;
- for_each_node(r) {
- if (alc[r])
- lockdep_set_class(&alc[r]->lock, alc_key);
- }
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
- slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
- int node;
-
- for_each_online_node(node)
- slab_set_debugobj_lock_classes_node(cachep, node);
-}
-
-static void init_node_lock_keys(int q)
-{
- int i;
-
- if (slab_state < UP)
- return;
-
- for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
- struct kmem_cache_node *n;
- struct kmem_cache *cache = kmalloc_caches[i];
-
- if (!cache)
- continue;
-
- n = cache->node[q];
- if (!n || OFF_SLAB(cache))
- continue;
-
- slab_set_lock_classes(cache, &on_slab_l3_key,
- &on_slab_alc_key, q);
- }
-}
-
-static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
-{
- if (!cachep->node[q])
- return;
-
- slab_set_lock_classes(cachep, &on_slab_l3_key,
- &on_slab_alc_key, q);
-}
-
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
- int node;
-
- VM_BUG_ON(OFF_SLAB(cachep));
- for_each_node(node)
- on_slab_lock_classes_node(cachep, node);
-}
-
-static inline void init_lock_keys(void)
-{
- int node;
-
- for_each_node(node)
- init_node_lock_keys(node);
-}
-#else
-static void init_node_lock_keys(int q)
-{
-}
-
-static inline void init_lock_keys(void)
-{
-}
-
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
-}
-
-static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-}
-#endif
-
static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -792,13 +660,8 @@ static void start_cpu_timer(int cpu)
}
}
-static struct array_cache *alloc_arraycache(int node, int entries,
- int batchcount, gfp_t gfp)
+static void init_arraycache(struct array_cache *ac, int limit, int batch)
{
- int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
- struct array_cache *nc = NULL;
-
- nc = kmalloc_node(memsize, gfp, node);
/*
* The array_cache structures contain pointers to free object.
* However, when such objects are allocated or transferred to another
@@ -806,15 +669,24 @@ static struct array_cache *alloc_arraycache(int node, int entries,
* valid references during a kmemleak scan. Therefore, kmemleak must
* not scan such objects.
*/
- kmemleak_no_scan(nc);
- if (nc) {
- nc->avail = 0;
- nc->limit = entries;
- nc->batchcount = batchcount;
- nc->touched = 0;
- spin_lock_init(&nc->lock);
+ kmemleak_no_scan(ac);
+ if (ac) {
+ ac->avail = 0;
+ ac->limit = limit;
+ ac->batchcount = batch;
+ ac->touched = 0;
}
- return nc;
+}
+
+static struct array_cache *alloc_arraycache(int node, int entries,
+ int batchcount, gfp_t gfp)
+{
+ size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
+ struct array_cache *ac = NULL;
+
+ ac = kmalloc_node(memsize, gfp, node);
+ init_arraycache(ac, entries, batchcount);
+ return ac;
}
static inline bool is_slab_pfmemalloc(struct page *page)
@@ -826,7 +698,7 @@ static inline bool is_slab_pfmemalloc(struct page *page)
static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
struct array_cache *ac)
{
- struct kmem_cache_node *n = cachep->node[numa_mem_id()];
+ struct kmem_cache_node *n = get_node(cachep, numa_mem_id());
struct page *page;
unsigned long flags;
@@ -881,7 +753,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
* If there are empty slabs on the slabs_free list and we are
* being forced to refill the cache, mark this one !pfmemalloc.
*/
- n = cachep->node[numa_mem_id()];
+ n = get_node(cachep, numa_mem_id());
if (!list_empty(&n->slabs_free) && force_refill) {
struct page *page = virt_to_head_page(objp);
ClearPageSlabPfmemalloc(page);
@@ -961,12 +833,13 @@ static int transfer_objects(struct array_cache *to,
#define drain_alien_cache(cachep, alien) do { } while (0)
#define reap_alien(cachep, n) do { } while (0)
-static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static inline struct alien_cache **alloc_alien_cache(int node,
+ int limit, gfp_t gfp)
{
- return (struct array_cache **)BAD_ALIEN_MAGIC;
+ return NULL;
}
-static inline void free_alien_cache(struct array_cache **ac_ptr)
+static inline void free_alien_cache(struct alien_cache **ac_ptr)
{
}
@@ -992,46 +865,60 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
-static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static struct alien_cache *__alloc_alien_cache(int node, int entries,
+ int batch, gfp_t gfp)
+{
+ size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
+ struct alien_cache *alc = NULL;
+
+ alc = kmalloc_node(memsize, gfp, node);
+ init_arraycache(&alc->ac, entries, batch);
+ spin_lock_init(&alc->lock);
+ return alc;
+}
+
+static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
{
- struct array_cache **ac_ptr;
- int memsize = sizeof(void *) * nr_node_ids;
+ struct alien_cache **alc_ptr;
+ size_t memsize = sizeof(void *) * nr_node_ids;
int i;
if (limit > 1)
limit = 12;
- ac_ptr = kzalloc_node(memsize, gfp, node);
- if (ac_ptr) {
- for_each_node(i) {
- if (i == node || !node_online(i))
- continue;
- ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
- if (!ac_ptr[i]) {
- for (i--; i >= 0; i--)
- kfree(ac_ptr[i]);
- kfree(ac_ptr);
- return NULL;
- }
+ alc_ptr = kzalloc_node(memsize, gfp, node);
+ if (!alc_ptr)
+ return NULL;
+
+ for_each_node(i) {
+ if (i == node || !node_online(i))
+ continue;
+ alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
+ if (!alc_ptr[i]) {
+ for (i--; i >= 0; i--)
+ kfree(alc_ptr[i]);
+ kfree(alc_ptr);
+ return NULL;
}
}
- return ac_ptr;
+ return alc_ptr;
}
-static void free_alien_cache(struct array_cache **ac_ptr)
+static void free_alien_cache(struct alien_cache **alc_ptr)
{
int i;
- if (!ac_ptr)
+ if (!alc_ptr)
return;
for_each_node(i)
- kfree(ac_ptr[i]);
- kfree(ac_ptr);
+ kfree(alc_ptr[i]);
+ kfree(alc_ptr);
}
static void __drain_alien_cache(struct kmem_cache *cachep,
- struct array_cache *ac, int node)
+ struct array_cache *ac, int node,
+ struct list_head *list)
{
- struct kmem_cache_node *n = cachep->node[node];
+ struct kmem_cache_node *n = get_node(cachep, node);
if (ac->avail) {
spin_lock(&n->list_lock);
@@ -1043,7 +930,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
if (n->shared)
transfer_objects(n->shared, ac, ac->limit);
- free_block(cachep, ac->entry, ac->avail, node);
+ free_block(cachep, ac->entry, ac->avail, node, list);
ac->avail = 0;
spin_unlock(&n->list_lock);
}
@@ -1057,28 +944,40 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
int node = __this_cpu_read(slab_reap_node);
if (n->alien) {
- struct array_cache *ac = n->alien[node];
+ struct alien_cache *alc = n->alien[node];
+ struct array_cache *ac;
+
+ if (alc) {
+ ac = &alc->ac;
+ if (ac->avail && spin_trylock_irq(&alc->lock)) {
+ LIST_HEAD(list);
- if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
- __drain_alien_cache(cachep, ac, node);
- spin_unlock_irq(&ac->lock);
+ __drain_alien_cache(cachep, ac, node, &list);
+ spin_unlock_irq(&alc->lock);
+ slabs_destroy(cachep, &list);
+ }
}
}
}
static void drain_alien_cache(struct kmem_cache *cachep,
- struct array_cache **alien)
+ struct alien_cache **alien)
{
int i = 0;
+ struct alien_cache *alc;
struct array_cache *ac;
unsigned long flags;
for_each_online_node(i) {
- ac = alien[i];
- if (ac) {
- spin_lock_irqsave(&ac->lock, flags);
- __drain_alien_cache(cachep, ac, i);
- spin_unlock_irqrestore(&ac->lock, flags);
+ alc = alien[i];
+ if (alc) {
+ LIST_HEAD(list);
+
+ ac = &alc->ac;
+ spin_lock_irqsave(&alc->lock, flags);
+ __drain_alien_cache(cachep, ac, i, &list);
+ spin_unlock_irqrestore(&alc->lock, flags);
+ slabs_destroy(cachep, &list);
}
}
}
@@ -1087,8 +986,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{
int nodeid = page_to_nid(virt_to_page(objp));
struct kmem_cache_node *n;
- struct array_cache *alien = NULL;
+ struct alien_cache *alien = NULL;
+ struct array_cache *ac;
int node;
+ LIST_HEAD(list);
node = numa_mem_id();
@@ -1099,21 +1000,25 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
if (likely(nodeid == node))
return 0;
- n = cachep->node[node];
+ n = get_node(cachep, node);
STATS_INC_NODEFREES(cachep);
if (n->alien && n->alien[nodeid]) {
alien = n->alien[nodeid];
+ ac = &alien->ac;
spin_lock(&alien->lock);
- if (unlikely(alien->avail == alien->limit)) {
+ if (unlikely(ac->avail == ac->limit)) {
STATS_INC_ACOVERFLOW(cachep);
- __drain_alien_cache(cachep, alien, nodeid);
+ __drain_alien_cache(cachep, ac, nodeid, &list);
}
- ac_put_obj(cachep, alien, objp);
+ ac_put_obj(cachep, ac, objp);
spin_unlock(&alien->lock);
+ slabs_destroy(cachep, &list);
} else {
- spin_lock(&(cachep->node[nodeid])->list_lock);
- free_block(cachep, &objp, 1, nodeid);
- spin_unlock(&(cachep->node[nodeid])->list_lock);
+ n = get_node(cachep, nodeid);
+ spin_lock(&n->list_lock);
+ free_block(cachep, &objp, 1, nodeid, &list);
+ spin_unlock(&n->list_lock);
+ slabs_destroy(cachep, &list);
}
return 1;
}
@@ -1132,7 +1037,7 @@ static int init_cache_node_node(int node)
{
struct kmem_cache *cachep;
struct kmem_cache_node *n;
- const int memsize = sizeof(struct kmem_cache_node);
+ const size_t memsize = sizeof(struct kmem_cache_node);
list_for_each_entry(cachep, &slab_caches, list) {
/*
@@ -1140,7 +1045,8 @@ static int init_cache_node_node(int node)
* begin anything. Make sure some other cpu on this
* node has not already allocated this
*/
- if (!cachep->node[node]) {
+ n = get_node(cachep, node);
+ if (!n) {
n = kmalloc_node(memsize, GFP_KERNEL, node);
if (!n)
return -ENOMEM;
@@ -1156,11 +1062,11 @@ static int init_cache_node_node(int node)
cachep->node[node] = n;
}
- spin_lock_irq(&cachep->node[node]->list_lock);
- cachep->node[node]->free_limit =
+ spin_lock_irq(&n->list_lock);
+ n->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- spin_unlock_irq(&cachep->node[node]->list_lock);
+ spin_unlock_irq(&n->list_lock);
}
return 0;
}
@@ -1181,12 +1087,13 @@ static void cpuup_canceled(long cpu)
list_for_each_entry(cachep, &slab_caches, list) {
struct array_cache *nc;
struct array_cache *shared;
- struct array_cache **alien;
+ struct alien_cache **alien;
+ LIST_HEAD(list);
/* cpu is dead; no one can alloc from it. */
nc = cachep->array[cpu];
cachep->array[cpu] = NULL;
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
goto free_array_cache;
@@ -1196,7 +1103,7 @@ static void cpuup_canceled(long cpu)
/* Free limit for this kmem_cache_node */
n->free_limit -= cachep->batchcount;
if (nc)
- free_block(cachep, nc->entry, nc->avail, node);
+ free_block(cachep, nc->entry, nc->avail, node, &list);
if (!cpumask_empty(mask)) {
spin_unlock_irq(&n->list_lock);
@@ -1206,7 +1113,7 @@ static void cpuup_canceled(long cpu)
shared = n->shared;
if (shared) {
free_block(cachep, shared->entry,
- shared->avail, node);
+ shared->avail, node, &list);
n->shared = NULL;
}
@@ -1221,6 +1128,7 @@ static void cpuup_canceled(long cpu)
free_alien_cache(alien);
}
free_array_cache:
+ slabs_destroy(cachep, &list);
kfree(nc);
}
/*
@@ -1229,7 +1137,7 @@ free_array_cache:
* shrink each nodelist to its limit.
*/
list_for_each_entry(cachep, &slab_caches, list) {
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
continue;
drain_freelist(cachep, n, slabs_tofree(cachep, n));
@@ -1260,7 +1168,7 @@ static int cpuup_prepare(long cpu)
list_for_each_entry(cachep, &slab_caches, list) {
struct array_cache *nc;
struct array_cache *shared = NULL;
- struct array_cache **alien = NULL;
+ struct alien_cache **alien = NULL;
nc = alloc_arraycache(node, cachep->limit,
cachep->batchcount, GFP_KERNEL);
@@ -1284,7 +1192,7 @@ static int cpuup_prepare(long cpu)
}
}
cachep->array[cpu] = nc;
- n = cachep->node[node];
+ n = get_node(cachep, node);
BUG_ON(!n);
spin_lock_irq(&n->list_lock);
@@ -1305,13 +1213,7 @@ static int cpuup_prepare(long cpu)
spin_unlock_irq(&n->list_lock);
kfree(shared);
free_alien_cache(alien);
- if (cachep->flags & SLAB_DEBUG_OBJECTS)
- slab_set_debugobj_lock_classes_node(cachep, node);
- else if (!OFF_SLAB(cachep) &&
- !(cachep->flags & SLAB_DESTROY_BY_RCU))
- on_slab_lock_classes_node(cachep, node);
}
- init_node_lock_keys(node);
return 0;
bad:
@@ -1395,7 +1297,7 @@ static int __meminit drain_cache_node_node(int node)
list_for_each_entry(cachep, &slab_caches, list) {
struct kmem_cache_node *n;
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
continue;
@@ -1575,10 +1477,6 @@ void __init kmem_cache_init(void)
memcpy(ptr, cpu_cache_get(kmem_cache),
sizeof(struct arraycache_init));
- /*
- * Do not assume that spinlocks can be initialized via memcpy:
- */
- spin_lock_init(&ptr->lock);
kmem_cache->array[smp_processor_id()] = ptr;
@@ -1588,10 +1486,6 @@ void __init kmem_cache_init(void)
!= &initarray_generic.cache);
memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
sizeof(struct arraycache_init));
- /*
- * Do not assume that spinlocks can be initialized via memcpy:
- */
- spin_lock_init(&ptr->lock);
kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
}
@@ -1628,9 +1522,6 @@ void __init kmem_cache_init_late(void)
BUG();
mutex_unlock(&slab_mutex);
- /* Annotate slab for lockdep -- annotate the malloc caches */
- init_lock_keys();
-
/* Done! */
slab_state = FULL;
@@ -1690,14 +1581,10 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n",
cachep->name, cachep->size, cachep->gfporder);
- for_each_online_node(node) {
+ for_each_kmem_cache_node(cachep, node, n) {
unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
unsigned long active_slabs = 0, num_slabs = 0;
- n = cachep->node[node];
- if (!n)
- continue;
-
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->slabs_full, lru) {
active_objs += cachep->num;
@@ -2060,6 +1947,16 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
kmem_cache_free(cachep->freelist_cache, freelist);
}
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
+{
+ struct page *page, *n;
+
+ list_for_each_entry_safe(page, n, list, lru) {
+ list_del(&page->lru);
+ slab_destroy(cachep, page);
+ }
+}
+
/**
* calculate_slab_order - calculate size (page order) of slabs
* @cachep: pointer to the cache that is being created
@@ -2405,17 +2302,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
return err;
}
- if (flags & SLAB_DEBUG_OBJECTS) {
- /*
- * Would deadlock through slab_destroy()->call_rcu()->
- * debug_object_activate()->kmem_cache_alloc().
- */
- WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
-
- slab_set_debugobj_lock_classes(cachep);
- } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
- on_slab_lock_classes(cachep);
-
return 0;
}
@@ -2434,7 +2320,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
+ assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
#endif
}
@@ -2442,7 +2328,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&cachep->node[node]->list_lock);
+ assert_spin_locked(&get_node(cachep, node)->list_lock);
#endif
}
@@ -2462,12 +2348,16 @@ static void do_drain(void *arg)
struct kmem_cache *cachep = arg;
struct array_cache *ac;
int node = numa_mem_id();
+ struct kmem_cache_node *n;
+ LIST_HEAD(list);
check_irq_off();
ac = cpu_cache_get(cachep);
- spin_lock(&cachep->node[node]->list_lock);
- free_block(cachep, ac->entry, ac->avail, node);
- spin_unlock(&cachep->node[node]->list_lock);
+ n = get_node(cachep, node);
+ spin_lock(&n->list_lock);
+ free_block(cachep, ac->entry, ac->avail, node, &list);
+ spin_unlock(&n->list_lock);
+ slabs_destroy(cachep, &list);
ac->avail = 0;
}
@@ -2478,17 +2368,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
on_each_cpu(do_drain, cachep, 1);
check_irq_on();
- for_each_online_node(node) {
- n = cachep->node[node];
- if (n && n->alien)
+ for_each_kmem_cache_node(cachep, node, n)
+ if (n->alien)
drain_alien_cache(cachep, n->alien);
- }
- for_each_online_node(node) {
- n = cachep->node[node];
- if (n)
- drain_array(cachep, n, n->shared, 1, node);
- }
+ for_each_kmem_cache_node(cachep, node, n)
+ drain_array(cachep, n, n->shared, 1, node);
}
/*
@@ -2534,17 +2419,14 @@ out:
int __kmem_cache_shrink(struct kmem_cache *cachep)
{
- int ret = 0, i = 0;
+ int ret = 0;
+ int node;
struct kmem_cache_node *n;
drain_cpu_caches(cachep);
check_irq_on();
- for_each_online_node(i) {
- n = cachep->node[i];
- if (!n)
- continue;
-
+ for_each_kmem_cache_node(cachep, node, n) {
drain_freelist(cachep, n, slabs_tofree(cachep, n));
ret += !list_empty(&n->slabs_full) ||
@@ -2566,13 +2448,11 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
kfree(cachep->array[i]);
/* NUMA: free the node structures */
- for_each_online_node(i) {
- n = cachep->node[i];
- if (n) {
- kfree(n->shared);
- free_alien_cache(n->alien);
- kfree(n);
- }
+ for_each_kmem_cache_node(cachep, i, n) {
+ kfree(n->shared);
+ free_alien_cache(n->alien);
+ kfree(n);
+ cachep->node[i] = NULL;
}
return 0;
}
@@ -2751,7 +2631,7 @@ static int cache_grow(struct kmem_cache *cachep,
/* Take the node list lock to change the colour_next on this node */
check_irq_off();
- n = cachep->node[nodeid];
+ n = get_node(cachep, nodeid);
spin_lock(&n->list_lock);
/* Get colour for the slab, and cal the next value. */
@@ -2920,7 +2800,7 @@ retry:
*/
batchcount = BATCHREFILL_LIMIT;
}
- n = cachep->node[node];
+ n = get_node(cachep, node);
BUG_ON(ac->avail > 0 || !n);
spin_lock(&n->list_lock);
@@ -3060,7 +2940,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
{
- if (cachep == kmem_cache)
+ if (unlikely(cachep == kmem_cache))
return false;
return should_failslab(cachep->object_size, flags, cachep->flags);
@@ -3169,8 +3049,8 @@ retry:
nid = zone_to_nid(zone);
if (cpuset_zone_allowed_hardwall(zone, flags) &&
- cache->node[nid] &&
- cache->node[nid]->free_objects) {
+ get_node(cache, nid) &&
+ get_node(cache, nid)->free_objects) {
obj = ____cache_alloc_node(cache,
flags | GFP_THISNODE, nid);
if (obj)
@@ -3233,7 +3113,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
int x;
VM_BUG_ON(nodeid > num_online_nodes());
- n = cachep->node[nodeid];
+ n = get_node(cachep, nodeid);
BUG_ON(!n);
retry:
@@ -3304,7 +3184,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
if (nodeid == NUMA_NO_NODE)
nodeid = slab_node;
- if (unlikely(!cachep->node[nodeid])) {
+ if (unlikely(!get_node(cachep, nodeid))) {
/* Node not bootstrapped yet */
ptr = fallback_alloc(cachep, flags);
goto out;
@@ -3405,12 +3285,13 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
/*
* Caller needs to acquire correct kmem_cache_node's list_lock
+ * @list: List of detached free slabs should be freed by caller
*/
-static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
- int node)
+static void free_block(struct kmem_cache *cachep, void **objpp,
+ int nr_objects, int node, struct list_head *list)
{
int i;
- struct kmem_cache_node *n;
+ struct kmem_cache_node *n = get_node(cachep, node);
for (i = 0; i < nr_objects; i++) {
void *objp;
@@ -3420,7 +3301,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
objp = objpp[i];
page = virt_to_head_page(objp);
- n = cachep->node[node];
list_del(&page->lru);
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp, node);
@@ -3431,13 +3311,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
if (page->active == 0) {
if (n->free_objects > n->free_limit) {
n->free_objects -= cachep->num;
- /* No need to drop any previously held
- * lock here, even if we have a off-slab slab
- * descriptor it is guaranteed to come from
- * a different cache, refer to comments before
- * alloc_slabmgmt.
- */
- slab_destroy(cachep, page);
+ list_add_tail(&page->lru, list);
} else {
list_add(&page->lru, &n->slabs_free);
}
@@ -3456,13 +3330,14 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
int batchcount;
struct kmem_cache_node *n;
int node = numa_mem_id();
+ LIST_HEAD(list);
batchcount = ac->batchcount;
#if DEBUG
BUG_ON(!batchcount || batchcount > ac->avail);
#endif
check_irq_off();
- n = cachep->node[node];
+ n = get_node(cachep, node);
spin_lock(&n->list_lock);
if (n->shared) {
struct array_cache *shared_array = n->shared;
@@ -3477,7 +3352,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
}
}
- free_block(cachep, ac->entry, batchcount, node);
+ free_block(cachep, ac->entry, batchcount, node, &list);
free_done:
#if STATS
{
@@ -3498,6 +3373,7 @@ free_done:
}
#endif
spin_unlock(&n->list_lock);
+ slabs_destroy(cachep, &list);
ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
}
@@ -3754,7 +3630,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
int node;
struct kmem_cache_node *n;
struct array_cache *new_shared;
- struct array_cache **new_alien = NULL;
+ struct alien_cache **new_alien = NULL;
for_each_online_node(node) {
@@ -3775,15 +3651,16 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
}
}
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (n) {
struct array_cache *shared = n->shared;
+ LIST_HEAD(list);
spin_lock_irq(&n->list_lock);
if (shared)
free_block(cachep, shared->entry,
- shared->avail, node);
+ shared->avail, node, &list);
n->shared = new_shared;
if (!n->alien) {
@@ -3793,6 +3670,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
n->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
kfree(shared);
free_alien_cache(new_alien);
continue;
@@ -3820,9 +3698,8 @@ fail:
/* Cache is not active yet. Roll back what we did */
node--;
while (node >= 0) {
- if (cachep->node[node]) {
- n = cachep->node[node];
-
+ n = get_node(cachep, node);
+ if (n) {
kfree(n->shared);
free_alien_cache(n->alien);
kfree(n);
@@ -3883,12 +3760,20 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
cachep->shared = shared;
for_each_online_cpu(i) {
+ LIST_HEAD(list);
struct array_cache *ccold = new->new[i];
+ int node;
+ struct kmem_cache_node *n;
+
if (!ccold)
continue;
- spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
- free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
- spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
+
+ node = cpu_to_mem(i);
+ n = get_node(cachep, node);
+ spin_lock_irq(&n->list_lock);
+ free_block(cachep, ccold->entry, ccold->avail, node, &list);
+ spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
kfree(ccold);
}
kfree(new);
@@ -3996,6 +3881,7 @@ skip_setup:
static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
struct array_cache *ac, int force, int node)
{
+ LIST_HEAD(list);
int tofree;
if (!ac || !ac->avail)
@@ -4008,12 +3894,13 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
tofree = force ? ac->avail : (ac->limit + 4) / 5;
if (tofree > ac->avail)
tofree = (ac->avail + 1) / 2;
- free_block(cachep, ac->entry, tofree, node);
+ free_block(cachep, ac->entry, tofree, node, &list);
ac->avail -= tofree;
memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void *) * ac->avail);
}
spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
}
}
@@ -4048,7 +3935,7 @@ static void cache_reap(struct work_struct *w)
* have established with reasonable certainty that
* we can do some work if the lock was obtained.
*/
- n = searchp->node[node];
+ n = get_node(searchp, node);
reap_alien(searchp, n);
@@ -4100,10 +3987,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
active_objs = 0;
num_slabs = 0;
- for_each_online_node(node) {
- n = cachep->node[node];
- if (!n)
- continue;
+ for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
spin_lock_irq(&n->list_lock);
@@ -4328,10 +4212,7 @@ static int leaks_show(struct seq_file *m, void *p)
x[1] = 0;
- for_each_online_node(node) {
- n = cachep->node[node];
- if (!n)
- continue;
+ for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
spin_lock_irq(&n->list_lock);
diff --git a/mm/slab.h b/mm/slab.h
index 961a3fb1f5a2..928823e17e58 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -260,9 +260,8 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
WARN_ON_ONCE(1);
return s;
}
-#endif
-
+#ifndef CONFIG_SLOB
/*
* The slab lists for all objects.
*/
@@ -277,7 +276,7 @@ struct kmem_cache_node {
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
struct array_cache *shared; /* shared per node */
- struct array_cache **alien; /* on other nodes */
+ struct alien_cache **alien; /* on other nodes */
unsigned long next_reap; /* updated without locking */
int free_touched; /* updated without locking */
#endif
@@ -294,5 +293,22 @@ struct kmem_cache_node {
};
+static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
+{
+ return s->node[node];
+}
+
+/*
+ * Iterator over all nodes. The body will be executed for each node that has
+ * a kmem_cache_node structure allocated (which is true for all online nodes)
+ */
+#define for_each_kmem_cache_node(__s, __node, __n) \
+ for (__node = 0; __n = get_node(__s, __node), __node < nr_node_ids; __node++) \
+ if (__n)
+
+#endif
+
void *slab_next(struct seq_file *m, void *p, loff_t *pos);
void slab_stop(struct seq_file *m, void *p);
+
+#endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index d31c4bacc6a2..d319502b2403 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -19,6 +19,8 @@
#include <asm/tlbflush.h>
#include <asm/page.h>
#include <linux/memcontrol.h>
+
+#define CREATE_TRACE_POINTS
#include <trace/events/kmem.h>
#include "slab.h"
@@ -787,3 +789,102 @@ static int __init slab_proc_init(void)
}
module_init(slab_proc_init);
#endif /* CONFIG_SLABINFO */
+
+static __always_inline void *__do_krealloc(const void *p, size_t new_size,
+ gfp_t flags)
+{
+ void *ret;
+ size_t ks = 0;
+
+ if (p)
+ ks = ksize(p);
+
+ if (ks >= new_size)
+ return (void *)p;
+
+ ret = kmalloc_track_caller(new_size, flags);
+ if (ret && p)
+ memcpy(ret, p, ks);
+
+ return ret;
+}
+
+/**
+ * __krealloc - like krealloc() but don't free @p.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * This function is like krealloc() except it never frees the originally
+ * allocated buffer. Use this if you don't want to free the buffer immediately
+ * like, for example, with RCU.
+ */
+void *__krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+ if (unlikely(!new_size))
+ return ZERO_SIZE_PTR;
+
+ return __do_krealloc(p, new_size, flags);
+
+}
+EXPORT_SYMBOL(__krealloc);
+
+/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * The contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes. If @p is %NULL, krealloc()
+ * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
+ * %NULL pointer, the object pointed to is freed.
+ */
+void *krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+ void *ret;
+
+ if (unlikely(!new_size)) {
+ kfree(p);
+ return ZERO_SIZE_PTR;
+ }
+
+ ret = __do_krealloc(p, new_size, flags);
+ if (ret && p != ret)
+ kfree(p);
+
+ return ret;
+}
+EXPORT_SYMBOL(krealloc);
+
+/**
+ * kzfree - like kfree but zero memory
+ * @p: object to free memory of
+ *
+ * The memory of the object @p points to is zeroed before freed.
+ * If @p is %NULL, kzfree() does nothing.
+ *
+ * Note: this function zeroes the whole allocated buffer which can be a good
+ * deal bigger than the requested buffer size passed to kmalloc(). So be
+ * careful when using this function in performance sensitive code.
+ */
+void kzfree(const void *p)
+{
+ size_t ks;
+ void *mem = (void *)p;
+
+ if (unlikely(ZERO_OR_NULL_PTR(mem)))
+ return;
+ ks = ksize(mem);
+ memset(mem, 0, ks);
+ kfree(mem);
+}
+EXPORT_SYMBOL(kzfree);
+
+/* Tracepoints definitions. */
+EXPORT_TRACEPOINT_SYMBOL(kmalloc);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
+EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
+EXPORT_TRACEPOINT_SYMBOL(kfree);
+EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
diff --git a/mm/slub.c b/mm/slub.c
index 8c24a23fdafa..2b068c3638aa 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -233,11 +233,6 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
* Core slab cache functions
*******************************************************************/
-static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
-{
- return s->node[node];
-}
-
/* Verify that a pointer has an address that is valid within a slab page */
static inline int check_valid_pointer(struct kmem_cache *s,
struct page *page, const void *object)
@@ -288,6 +283,10 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
__p += (__s)->size)
+#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
+ for (__p = (__addr), __idx = 1; __idx <= __objects;\
+ __p += (__s)->size, __idx++)
+
/* Determine object index from a given position */
static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
{
@@ -945,60 +944,6 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
}
/*
- * Hooks for other subsystems that check memory allocations. In a typical
- * production configuration these hooks all should produce no code at all.
- */
-static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
-{
- kmemleak_alloc(ptr, size, 1, flags);
-}
-
-static inline void kfree_hook(const void *x)
-{
- kmemleak_free(x);
-}
-
-static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
-{
- flags &= gfp_allowed_mask;
- lockdep_trace_alloc(flags);
- might_sleep_if(flags & __GFP_WAIT);
-
- return should_failslab(s->object_size, flags, s->flags);
-}
-
-static inline void slab_post_alloc_hook(struct kmem_cache *s,
- gfp_t flags, void *object)
-{
- flags &= gfp_allowed_mask;
- kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
- kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
-}
-
-static inline void slab_free_hook(struct kmem_cache *s, void *x)
-{
- kmemleak_free_recursive(x, s->flags);
-
- /*
- * Trouble is that we may no longer disable interrupts in the fast path
- * So in order to make the debug calls that expect irqs to be
- * disabled we need to disable interrupts temporarily.
- */
-#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
- {
- unsigned long flags;
-
- local_irq_save(flags);
- kmemcheck_slab_free(s, x, s->object_size);
- debug_check_no_locks_freed(x, s->object_size);
- local_irq_restore(flags);
- }
-#endif
- if (!(s->flags & SLAB_DEBUG_OBJECTS))
- debug_check_no_obj_freed(x, s->object_size);
-}
-
-/*
* Tracking of fully allocated slabs for debugging purposes.
*/
static void add_full(struct kmem_cache *s,
@@ -1282,6 +1227,12 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
+#endif /* CONFIG_SLUB_DEBUG */
+
+/*
+ * Hooks for other subsystems that check memory allocations. In a typical
+ * production configuration these hooks all should produce no code at all.
+ */
static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
{
kmemleak_alloc(ptr, size, 1, flags);
@@ -1293,21 +1244,44 @@ static inline void kfree_hook(const void *x)
}
static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
- { return 0; }
+{
+ flags &= gfp_allowed_mask;
+ lockdep_trace_alloc(flags);
+ might_sleep_if(flags & __GFP_WAIT);
+
+ return should_failslab(s->object_size, flags, s->flags);
+}
-static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
- void *object)
+static inline void slab_post_alloc_hook(struct kmem_cache *s,
+ gfp_t flags, void *object)
{
- kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
- flags & gfp_allowed_mask);
+ flags &= gfp_allowed_mask;
+ kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
+ kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
}
static inline void slab_free_hook(struct kmem_cache *s, void *x)
{
kmemleak_free_recursive(x, s->flags);
-}
-#endif /* CONFIG_SLUB_DEBUG */
+ /*
+ * Trouble is that we may no longer disable interrupts in the fast path
+ * So in order to make the debug calls that expect irqs to be
+ * disabled we need to disable interrupts temporarily.
+ */
+#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
+ {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ kmemcheck_slab_free(s, x, s->object_size);
+ debug_check_no_locks_freed(x, s->object_size);
+ local_irq_restore(flags);
+ }
+#endif
+ if (!(s->flags & SLAB_DEBUG_OBJECTS))
+ debug_check_no_obj_freed(x, s->object_size);
+}
/*
* Slab allocation and freeing
@@ -1409,9 +1383,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
void *start;
- void *last;
void *p;
int order;
+ int idx;
BUG_ON(flags & GFP_SLAB_BUG_MASK);
@@ -1432,14 +1406,13 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
if (unlikely(s->flags & SLAB_POISON))
memset(start, POISON_INUSE, PAGE_SIZE << order);
- last = start;
- for_each_object(p, s, start, page->objects) {
- setup_object(s, page, last);
- set_freepointer(s, last, p);
- last = p;
+ for_each_object_idx(p, idx, s, start, page->objects) {
+ setup_object(s, page, p);
+ if (likely(idx < page->objects))
+ set_freepointer(s, p, p + s->size);
+ else
+ set_freepointer(s, p, NULL);
}
- setup_object(s, page, last);
- set_freepointer(s, last, NULL);
page->freelist = start;
page->inuse = page->objects;
@@ -2162,6 +2135,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
int node;
+ struct kmem_cache_node *n;
if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
return;
@@ -2176,15 +2150,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
s->name);
- for_each_online_node(node) {
- struct kmem_cache_node *n = get_node(s, node);
+ for_each_kmem_cache_node(s, node, n) {
unsigned long nr_slabs;
unsigned long nr_objs;
unsigned long nr_free;
- if (!n)
- continue;
-
nr_free = count_partial(n, count_free);
nr_slabs = node_nr_slabs(n);
nr_objs = node_nr_objs(n);
@@ -2928,13 +2898,10 @@ static void early_kmem_cache_node_alloc(int node)
static void free_kmem_cache_nodes(struct kmem_cache *s)
{
int node;
+ struct kmem_cache_node *n;
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = s->node[node];
-
- if (n)
- kmem_cache_free(kmem_cache_node, n);
-
+ for_each_kmem_cache_node(s, node, n) {
+ kmem_cache_free(kmem_cache_node, n);
s->node[node] = NULL;
}
}
@@ -3224,12 +3191,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
static inline int kmem_cache_close(struct kmem_cache *s)
{
int node;
+ struct kmem_cache_node *n;
flush_all(s);
/* Attempt to free all objects */
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
-
+ for_each_kmem_cache_node(s, node, n) {
free_partial(s, n);
if (n->nr_partial || slabs_node(s, node))
return 1;
@@ -3414,9 +3380,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
return -ENOMEM;
flush_all(s);
- for_each_node_state(node, N_NORMAL_MEMORY) {
- n = get_node(s, node);
-
+ for_each_kmem_cache_node(s, node, n) {
if (!n->nr_partial)
continue;
@@ -3588,6 +3552,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
{
int node;
struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ struct kmem_cache_node *n;
memcpy(s, static_cache, kmem_cache->object_size);
@@ -3597,19 +3562,16 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
* IPIs around.
*/
__flush_cpu_slab(s, smp_processor_id());
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ for_each_kmem_cache_node(s, node, n) {
struct page *p;
- if (n) {
- list_for_each_entry(p, &n->partial, lru)
- p->slab_cache = s;
+ list_for_each_entry(p, &n->partial, lru)
+ p->slab_cache = s;
#ifdef CONFIG_SLUB_DEBUG
- list_for_each_entry(p, &n->full, lru)
- p->slab_cache = s;
+ list_for_each_entry(p, &n->full, lru)
+ p->slab_cache = s;
#endif
- }
}
list_add(&s->list, &slab_caches);
return s;
@@ -3962,16 +3924,14 @@ static long validate_slab_cache(struct kmem_cache *s)
unsigned long count = 0;
unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
sizeof(unsigned long), GFP_KERNEL);
+ struct kmem_cache_node *n;
if (!map)
return -ENOMEM;
flush_all(s);
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
-
+ for_each_kmem_cache_node(s, node, n)
count += validate_slab_node(s, n, map);
- }
kfree(map);
return count;
}
@@ -4125,6 +4085,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
int node;
unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
sizeof(unsigned long), GFP_KERNEL);
+ struct kmem_cache_node *n;
if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
GFP_TEMPORARY)) {
@@ -4134,8 +4095,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
/* Push back cpu slabs */
flush_all(s);
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ for_each_kmem_cache_node(s, node, n) {
unsigned long flags;
struct page *page;
@@ -4207,7 +4167,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
#endif
#ifdef SLUB_RESILIENCY_TEST
-static void resiliency_test(void)
+static void __init resiliency_test(void)
{
u8 *p;
@@ -4334,8 +4294,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
get_online_mems();
#ifdef CONFIG_SLUB_DEBUG
if (flags & SO_ALL) {
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ struct kmem_cache_node *n;
+
+ for_each_kmem_cache_node(s, node, n) {
if (flags & SO_TOTAL)
x = atomic_long_read(&n->total_objects);
@@ -4351,9 +4312,9 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
} else
#endif
if (flags & SO_PARTIAL) {
- for_each_node_state(node, N_NORMAL_MEMORY) {
- struct kmem_cache_node *n = get_node(s, node);
+ struct kmem_cache_node *n;
+ for_each_kmem_cache_node(s, node, n) {
if (flags & SO_TOTAL)
x = count_partial(n, count_total);
else if (flags & SO_OBJECTS)
@@ -4366,7 +4327,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
}
x = sprintf(buf, "%lu", total);
#ifdef CONFIG_NUMA
- for_each_node_state(node, N_NORMAL_MEMORY)
+ for (node = 0; node < nr_node_ids; node++)
if (nodes[node])
x += sprintf(buf + x, " N%d=%lu",
node, nodes[node]);
@@ -4380,16 +4341,12 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
static int any_slab_objects(struct kmem_cache *s)
{
int node;
+ struct kmem_cache_node *n;
- for_each_online_node(node) {
- struct kmem_cache_node *n = get_node(s, node);
-
- if (!n)
- continue;
-
+ for_each_kmem_cache_node(s, node, n)
if (atomic_long_read(&n->total_objects))
return 1;
- }
+
return 0;
}
#endif
@@ -5344,13 +5301,9 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
unsigned long nr_objs = 0;
unsigned long nr_free = 0;
int node;
+ struct kmem_cache_node *n;
- for_each_online_node(node) {
- struct kmem_cache_node *n = get_node(s, node);
-
- if (!n)
- continue;
-
+ for_each_kmem_cache_node(s, node, n) {
nr_slabs += node_nr_slabs(n);
nr_objs += node_nr_objs(n);
nr_free += count_partial(n, count_free);
diff --git a/mm/swap.c b/mm/swap.c
index 9e8e3472248b..6b2dc3897cd5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -62,6 +62,7 @@ static void __page_cache_release(struct page *page)
del_page_from_lru_list(page, lruvec, page_off_lru(page));
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
+ mem_cgroup_uncharge(page);
}
static void __put_single_page(struct page *page)
@@ -501,7 +502,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
SetPageActive(page);
lru += LRU_ACTIVE;
add_page_to_lru_list(page, lruvec, lru);
- trace_mm_lru_activate(page, page_to_pfn(page));
+ trace_mm_lru_activate(page);
__count_vm_event(PGACTIVATE);
update_page_reclaim_stat(lruvec, file, 1);
@@ -589,6 +590,9 @@ static void __lru_cache_activate_page(struct page *page)
* inactive,unreferenced -> inactive,referenced
* inactive,referenced -> active,unreferenced
* active,unreferenced -> active,referenced
+ *
+ * When a newly allocated page is not yet visible, so safe for non-atomic ops,
+ * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
*/
void mark_page_accessed(struct page *page)
{
@@ -614,17 +618,6 @@ void mark_page_accessed(struct page *page)
}
EXPORT_SYMBOL(mark_page_accessed);
-/*
- * Used to mark_page_accessed(page) that is not visible yet and when it is
- * still safe to use non-atomic ops
- */
-void init_page_accessed(struct page *page)
-{
- if (!PageReferenced(page))
- __SetPageReferenced(page);
-}
-EXPORT_SYMBOL(init_page_accessed);
-
static void __lru_cache_add(struct page *page)
{
struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
@@ -695,6 +688,40 @@ void add_page_to_unevictable_list(struct page *page)
spin_unlock_irq(&zone->lru_lock);
}
+/**
+ * lru_cache_add_active_or_unevictable
+ * @page: the page to be added to LRU
+ * @vma: vma in which page is mapped for determining reclaimability
+ *
+ * Place @page on the active or unevictable LRU list, depending on its
+ * evictability. Note that if the page is not evictable, it goes
+ * directly back onto it's zone's unevictable list, it does NOT use a
+ * per cpu pagevec.
+ */
+void lru_cache_add_active_or_unevictable(struct page *page,
+ struct vm_area_struct *vma)
+{
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+
+ if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) {
+ SetPageActive(page);
+ lru_cache_add(page);
+ return;
+ }
+
+ if (!TestSetPageMlocked(page)) {
+ /*
+ * We use the irq-unsafe __mod_zone_page_stat because this
+ * counter is not modified from interrupt context, and the pte
+ * lock is held(spinlock), which implies preemption disabled.
+ */
+ __mod_zone_page_state(page_zone(page), NR_MLOCK,
+ hpage_nr_pages(page));
+ count_vm_event(UNEVICTABLE_PGMLOCKED);
+ }
+ add_page_to_unevictable_list(page);
+}
+
/*
* If the page can not be invalidated, it is moved to the
* inactive list to speed up its reclaim. It is moved to the
@@ -921,6 +948,7 @@ void release_pages(struct page **pages, int nr, bool cold)
if (zone)
spin_unlock_irqrestore(&zone->lru_lock, flags);
+ mem_cgroup_uncharge_list(&pages_to_free);
free_hot_cold_page_list(&pages_to_free, cold);
}
EXPORT_SYMBOL(release_pages);
@@ -996,7 +1024,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
SetPageLRU(page);
add_page_to_lru_list(page, lruvec, lru);
update_page_reclaim_stat(lruvec, file, active);
- trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page));
+ trace_mm_lru_insertion(page, lru);
}
/*
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 2972eee184a4..e160151da6b8 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -176,7 +176,7 @@ int add_to_swap(struct page *page, struct list_head *list)
if (unlikely(PageTransHuge(page)))
if (unlikely(split_huge_page_to_list(page, list))) {
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
return 0;
}
@@ -202,7 +202,7 @@ int add_to_swap(struct page *page, struct list_head *list)
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
* clear SWAP_HAS_CACHE flag.
*/
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
return 0;
}
}
@@ -225,7 +225,7 @@ void delete_from_swap_cache(struct page *page)
__delete_from_swap_cache(page);
spin_unlock_irq(&address_space->tree_lock);
- swapcache_free(entry, page);
+ swapcache_free(entry);
page_cache_release(page);
}
@@ -386,7 +386,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
* clear SWAP_HAS_CACHE flag.
*/
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
} while (err != -ENOMEM);
if (new_page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4c524f7bd0bf..8798b2e0ac59 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -843,16 +843,13 @@ void swap_free(swp_entry_t entry)
/*
* Called after dropping swapcache to decrease refcnt to swap entries.
*/
-void swapcache_free(swp_entry_t entry, struct page *page)
+void swapcache_free(swp_entry_t entry)
{
struct swap_info_struct *p;
- unsigned char count;
p = swap_info_get(entry);
if (p) {
- count = swap_entry_free(p, entry, SWAP_HAS_CACHE);
- if (page)
- mem_cgroup_uncharge_swapcache(page, entry, count != 0);
+ swap_entry_free(p, entry, SWAP_HAS_CACHE);
spin_unlock(&p->lock);
}
}
@@ -1106,15 +1103,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
if (unlikely(!page))
return -ENOMEM;
- if (mem_cgroup_try_charge_swapin(vma->vm_mm, page,
- GFP_KERNEL, &memcg)) {
+ if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg)) {
ret = -ENOMEM;
goto out_nolock;
}
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
- mem_cgroup_cancel_charge_swapin(memcg);
+ mem_cgroup_cancel_charge(page, memcg);
ret = 0;
goto out;
}
@@ -1124,11 +1120,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
get_page(page);
set_pte_at(vma->vm_mm, addr, pte,
pte_mkold(mk_pte(page, vma->vm_page_prot)));
- if (page == swapcache)
+ if (page == swapcache) {
page_add_anon_rmap(page, vma, addr);
- else /* ksm created a completely new copy */
+ mem_cgroup_commit_charge(page, memcg, true);
+ } else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, addr);
- mem_cgroup_commit_charge_swapin(page, memcg);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
+ }
swap_free(entry);
/*
* Move the page to the active list so it is not
diff --git a/mm/truncate.c b/mm/truncate.c
index 6a78c814bebf..96d167372d89 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -281,7 +281,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE),
indices)) {
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -307,7 +306,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
@@ -355,26 +353,30 @@ void truncate_inode_pages_range(struct address_space *mapping,
for ( ; ; ) {
cond_resched();
if (!pagevec_lookup_entries(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE),
- indices)) {
+ min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
+ /* If all gone from start onwards, we're done */
if (index == start)
break;
+ /* Otherwise restart to make sure all gone */
index = start;
continue;
}
if (index == start && indices[0] >= end) {
+ /* All gone out of hole to be punched, we're done */
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
break;
}
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
/* We rely upon deletion not changing page->index */
index = indices[i];
- if (index >= end)
+ if (index >= end) {
+ /* Restart punch to make sure all gone */
+ index = start - 1;
break;
+ }
if (radix_tree_exceptional_entry(page)) {
clear_exceptional_entry(mapping, index, page);
@@ -389,7 +391,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
index++;
}
cleancache_invalidate_inode(mapping);
@@ -488,7 +489,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
indices)) {
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -517,7 +517,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
@@ -548,7 +547,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
BUG_ON(page_has_private(page));
__delete_from_page_cache(page, NULL);
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
if (mapping->a_ops->freepage)
mapping->a_ops->freepage(page);
@@ -597,7 +595,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
indices)) {
- mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -650,7 +647,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
}
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
- mem_cgroup_uncharge_end();
cond_resched();
index++;
}
diff --git a/mm/util.c b/mm/util.c
index d5ea733c5082..ff7a52e7386e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -3,6 +3,7 @@
#include <linux/string.h>
#include <linux/compiler.h>
#include <linux/export.h>
+#include <linux/ctype.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/security.h>
@@ -16,9 +17,6 @@
#include "internal.h"
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
/**
* kstrdup - allocate space for and copy an existing string
* @s: the string to duplicate
@@ -65,6 +63,35 @@ char *kstrndup(const char *s, size_t max, gfp_t gfp)
EXPORT_SYMBOL(kstrndup);
/**
+ * kstrimdup - Trim and copy a %NUL terminated string.
+ * @s: the string to trim and duplicate
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Returns an address, which the caller must kfree, containing
+ * a duplicate of the passed string with leading and/or trailing
+ * whitespace (as defined by isspace) removed.
+ */
+char *kstrimdup(const char *s, gfp_t gfp)
+{
+ char *buf;
+ char *begin = skip_spaces(s);
+ size_t len = strlen(begin);
+
+ while (len && isspace(begin[len - 1]))
+ len--;
+
+ buf = kmalloc_track_caller(len + 1, gfp);
+ if (!buf)
+ return NULL;
+
+ memcpy(buf, begin, len);
+ buf[len] = '\0';
+
+ return buf;
+}
+EXPORT_SYMBOL(kstrimdup);
+
+/**
* kmemdup - duplicate region of memory
*
* @src: memory region to duplicate
@@ -112,97 +139,6 @@ void *memdup_user(const void __user *src, size_t len)
}
EXPORT_SYMBOL(memdup_user);
-static __always_inline void *__do_krealloc(const void *p, size_t new_size,
- gfp_t flags)
-{
- void *ret;
- size_t ks = 0;
-
- if (p)
- ks = ksize(p);
-
- if (ks >= new_size)
- return (void *)p;
-
- ret = kmalloc_track_caller(new_size, flags);
- if (ret && p)
- memcpy(ret, p, ks);
-
- return ret;
-}
-
-/**
- * __krealloc - like krealloc() but don't free @p.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * This function is like krealloc() except it never frees the originally
- * allocated buffer. Use this if you don't want to free the buffer immediately
- * like, for example, with RCU.
- */
-void *__krealloc(const void *p, size_t new_size, gfp_t flags)
-{
- if (unlikely(!new_size))
- return ZERO_SIZE_PTR;
-
- return __do_krealloc(p, new_size, flags);
-
-}
-EXPORT_SYMBOL(__krealloc);
-
-/**
- * krealloc - reallocate memory. The contents will remain unchanged.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * The contents of the object pointed to are preserved up to the
- * lesser of the new and old sizes. If @p is %NULL, krealloc()
- * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
- * %NULL pointer, the object pointed to is freed.
- */
-void *krealloc(const void *p, size_t new_size, gfp_t flags)
-{
- void *ret;
-
- if (unlikely(!new_size)) {
- kfree(p);
- return ZERO_SIZE_PTR;
- }
-
- ret = __do_krealloc(p, new_size, flags);
- if (ret && p != ret)
- kfree(p);
-
- return ret;
-}
-EXPORT_SYMBOL(krealloc);
-
-/**
- * kzfree - like kfree but zero memory
- * @p: object to free memory of
- *
- * The memory of the object @p points to is zeroed before freed.
- * If @p is %NULL, kzfree() does nothing.
- *
- * Note: this function zeroes the whole allocated buffer which can be a good
- * deal bigger than the requested buffer size passed to kmalloc(). So be
- * careful when using this function in performance sensitive code.
- */
-void kzfree(const void *p)
-{
- size_t ks;
- void *mem = (void *)p;
-
- if (unlikely(ZERO_OR_NULL_PTR(mem)))
- return;
- ks = ksize(mem);
- memset(mem, 0, ks);
- kfree(mem);
-}
-EXPORT_SYMBOL(kzfree);
-
/*
* strndup_user - duplicate an existing string from user space
* @s: The string to duplicate
@@ -504,11 +440,3 @@ out_mm:
out:
return res;
}
-
-/* Tracepoints definitions. */
-EXPORT_TRACEPOINT_SYMBOL(kmalloc);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
-EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
-EXPORT_TRACEPOINT_SYMBOL(kfree);
-EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f64632b67196..2b0aa5486092 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1270,19 +1270,15 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
}
EXPORT_SYMBOL_GPL(unmap_kernel_range);
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
+int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
{
unsigned long addr = (unsigned long)area->addr;
unsigned long end = addr + get_vm_area_size(area);
int err;
- err = vmap_page_range(addr, end, prot, *pages);
- if (err > 0) {
- *pages += err;
- err = 0;
- }
+ err = vmap_page_range(addr, end, prot, pages);
- return err;
+ return err > 0 ? 0 : err;
}
EXPORT_SYMBOL_GPL(map_vm_area);
@@ -1548,7 +1544,7 @@ void *vmap(struct page **pages, unsigned int count,
if (!area)
return NULL;
- if (map_vm_area(area, prot, &pages)) {
+ if (map_vm_area(area, prot, pages)) {
vunmap(area->addr);
return NULL;
}
@@ -1566,7 +1562,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
const int order = 0;
struct page **pages;
unsigned int nr_pages, array_size, i;
- gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+ const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
+ const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
@@ -1589,12 +1586,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
- gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
if (node == NUMA_NO_NODE)
- page = alloc_page(tmp_mask);
+ page = alloc_page(alloc_mask);
else
- page = alloc_pages_node(node, tmp_mask, order);
+ page = alloc_pages_node(node, alloc_mask, order);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
@@ -1602,9 +1598,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
goto fail;
}
area->pages[i] = page;
+ if (gfp_mask & __GFP_WAIT)
+ cond_resched();
}
- if (map_vm_area(area, prot, &pages))
+ if (map_vm_area(area, prot, pages))
goto fail;
return area->addr;
@@ -2690,14 +2688,14 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
prev_end = VMALLOC_START;
- spin_lock(&vmap_area_lock);
+ rcu_read_lock();
if (list_empty(&vmap_area_list)) {
vmi->largest_chunk = VMALLOC_TOTAL;
goto out;
}
- list_for_each_entry(va, &vmap_area_list, list) {
+ list_for_each_entry_rcu(va, &vmap_area_list, list) {
unsigned long addr = va->va_start;
/*
@@ -2724,7 +2722,7 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
vmi->largest_chunk = VMALLOC_END - prev_end;
out:
- spin_unlock(&vmap_area_lock);
+ rcu_read_unlock();
}
#endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0f16ffe8eb67..27432baea971 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -58,36 +58,28 @@
#define CREATE_TRACE_POINTS
#include <trace/events/vmscan.h>
-struct scan_control {
- /* Incremented by the number of inactive pages that were scanned */
- unsigned long nr_scanned;
-
- /* Number of pages freed so far during a call to shrink_zones() */
- unsigned long nr_reclaimed;
+/* Scan control flags */
+#define MAY_WRITEPAGE 0x1
+#define MAY_UNMAP 0x2
+#define MAY_SWAP 0x4
+#define MAY_SKIP_CONGESTION 0x8
+#define COMPACTION_READY 0x10
+struct scan_control {
/* How many pages shrink_list() should reclaim */
unsigned long nr_to_reclaim;
- unsigned long hibernation_mode;
-
/* This context's GFP mask */
gfp_t gfp_mask;
- int may_writepage;
-
- /* Can mapped pages be reclaimed? */
- int may_unmap;
-
- /* Can pages be swapped as part of reclaim? */
- int may_swap;
-
+ /* Allocation order */
int order;
- /* Scan (total_size >> priority) pages at once */
- int priority;
-
- /* anon vs. file LRUs scanning "ratio" */
- int swappiness;
+ /*
+ * Nodemask of nodes allowed by the caller. If NULL, all nodes
+ * are scanned.
+ */
+ nodemask_t *nodemask;
/*
* The memory cgroup that hit its limit and as a result is the
@@ -95,11 +87,17 @@ struct scan_control {
*/
struct mem_cgroup *target_mem_cgroup;
- /*
- * Nodemask of nodes allowed by the caller. If NULL, all nodes
- * are scanned.
- */
- nodemask_t *nodemask;
+ /* Scan (total_size >> priority) pages at once */
+ int priority;
+
+ /* Scan control flags; see above */
+ unsigned int flags;
+
+ /* Incremented by the number of inactive pages that were scanned */
+ unsigned long nr_scanned;
+
+ /* Number of pages freed so far during a call to shrink_zones() */
+ unsigned long nr_reclaimed;
};
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -136,7 +134,11 @@ struct scan_control {
* From 0 .. 100. Higher means more swappy.
*/
int vm_swappiness = 60;
-unsigned long vm_total_pages; /* The total number of pages which the VM controls */
+/*
+ * The total number of pages which are beyond the high watermark within all
+ * zones.
+ */
+unsigned long vm_total_pages;
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
@@ -169,7 +171,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone)
bool zone_reclaimable(struct zone *zone)
{
- return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+ return zone_page_state(zone, NR_PAGES_SCANNED) <
+ zone_reclaimable_pages(zone) * 6;
}
static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -571,9 +574,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
+ mem_cgroup_swapout(page, swap);
__delete_from_swap_cache(page);
spin_unlock_irq(&mapping->tree_lock);
- swapcache_free(swap, page);
+ swapcache_free(swap);
} else {
void (*freepage)(struct page *);
void *shadow = NULL;
@@ -594,7 +598,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
shadow = workingset_eviction(mapping, page);
__delete_from_page_cache(page, shadow);
spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_uncharge_cache_page(page);
if (freepage != NULL)
freepage(page);
@@ -816,7 +819,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
cond_resched();
- mem_cgroup_uncharge_start();
while (!list_empty(page_list)) {
struct address_space *mapping;
struct page *page;
@@ -840,7 +842,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (unlikely(!page_evictable(page)))
goto cull_mlocked;
- if (!sc->may_unmap && page_mapped(page))
+ if (!(sc->flags & MAY_UNMAP) && page_mapped(page))
goto keep_locked;
/* Double the slab pressure for mapped and swapcache pages */
@@ -1014,7 +1016,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto keep_locked;
if (!may_enter_fs)
goto keep_locked;
- if (!sc->may_writepage)
+ if (!(sc->flags & MAY_WRITEPAGE))
goto keep_locked;
/* Page is dirty, try to write it out here */
@@ -1127,11 +1129,12 @@ keep:
VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
}
+ mem_cgroup_uncharge_list(&free_pages);
free_hot_cold_page_list(&free_pages, true);
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
- mem_cgroup_uncharge_end();
+
*ret_nr_dirty += nr_dirty;
*ret_nr_congested += nr_congested;
*ret_nr_unqueued_dirty += nr_unqueued_dirty;
@@ -1146,7 +1149,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY,
- .may_unmap = 1,
+ .flags = MAY_UNMAP,
};
unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
struct page *page, *next;
@@ -1431,6 +1434,7 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock);
} else
@@ -1489,9 +1493,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
lru_add_drain();
- if (!sc->may_unmap)
+ if (!(sc->flags & MAY_UNMAP))
isolate_mode |= ISOLATE_UNMAPPED;
- if (!sc->may_writepage)
+ if (!(sc->flags & MAY_WRITEPAGE))
isolate_mode |= ISOLATE_CLEAN;
spin_lock_irq(&zone->lru_lock);
@@ -1503,7 +1507,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
if (global_reclaim(sc)) {
- zone->pages_scanned += nr_scanned;
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
if (current_is_kswapd())
__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned);
else
@@ -1538,6 +1542,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge_list(&page_list);
free_hot_cold_page_list(&page_list, true);
/*
@@ -1593,7 +1598,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* is congested. Allow kswapd to continue until it starts encountering
* unqueued dirty pages or cycling through the LRU too quickly.
*/
- if (!sc->hibernation_mode && !current_is_kswapd() &&
+ if (!(sc->flags & MAY_SKIP_CONGESTION) && !current_is_kswapd() &&
current_may_throttle())
wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
@@ -1652,6 +1657,7 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock);
} else
@@ -1683,9 +1689,9 @@ static void shrink_active_list(unsigned long nr_to_scan,
lru_add_drain();
- if (!sc->may_unmap)
+ if (!(sc->flags & MAY_UNMAP))
isolate_mode |= ISOLATE_UNMAPPED;
- if (!sc->may_writepage)
+ if (!(sc->flags & MAY_WRITEPAGE))
isolate_mode |= ISOLATE_CLEAN;
spin_lock_irq(&zone->lru_lock);
@@ -1693,7 +1699,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
&nr_scanned, sc, isolate_mode, lru);
if (global_reclaim(sc))
- zone->pages_scanned += nr_scanned;
+ __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);
reclaim_stat->recent_scanned[file] += nr_taken;
@@ -1759,6 +1765,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge_list(&l_hold);
free_hot_cold_page_list(&l_hold, true);
}
@@ -1865,8 +1872,8 @@ enum scan_balance {
* nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
* nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
*/
-static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
- unsigned long *nr)
+static void get_scan_count(struct lruvec *lruvec, int swappiness,
+ struct scan_control *sc, unsigned long *nr)
{
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
u64 fraction[2];
@@ -1897,7 +1904,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
force_scan = true;
/* If we have no swap space, do not bother scanning anon pages. */
- if (!sc->may_swap || (get_nr_swap_pages() <= 0)) {
+ if (!(sc->flags & MAY_SWAP) || (get_nr_swap_pages() <= 0)) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -1909,7 +1916,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* using the memory controller's swap limit feature would be
* too expensive.
*/
- if (!global_reclaim(sc) && !sc->swappiness) {
+ if (!global_reclaim(sc) && !swappiness) {
scan_balance = SCAN_FILE;
goto out;
}
@@ -1919,7 +1926,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* system is close to OOM, scan both anon and file equally
* (unless the swappiness setting disagrees with swapping).
*/
- if (!sc->priority && sc->swappiness) {
+ if (!sc->priority && swappiness) {
scan_balance = SCAN_EQUAL;
goto out;
}
@@ -1962,7 +1969,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
- anon_prio = sc->swappiness;
+ anon_prio = swappiness;
file_prio = 200 - anon_prio;
/*
@@ -2052,7 +2059,8 @@ out:
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
+ struct scan_control *sc)
{
unsigned long nr[NR_LRU_LISTS];
unsigned long targets[NR_LRU_LISTS];
@@ -2063,7 +2071,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
struct blk_plug plug;
bool scan_adjusted;
- get_scan_count(lruvec, sc, nr);
+ get_scan_count(lruvec, swappiness, sc, nr);
/* Record the original scan target for proportional adjustments later */
memcpy(targets, nr, sizeof(nr));
@@ -2241,9 +2249,10 @@ static inline bool should_continue_reclaim(struct zone *zone,
}
}
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc)
{
unsigned long nr_reclaimed, nr_scanned;
+ bool reclaimable = false;
do {
struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2259,11 +2268,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
memcg = mem_cgroup_iter(root, NULL, &reclaim);
do {
struct lruvec *lruvec;
+ int swappiness;
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ swappiness = mem_cgroup_swappiness(memcg);
- sc->swappiness = mem_cgroup_swappiness(memcg);
- shrink_lruvec(lruvec, sc);
+ shrink_lruvec(lruvec, swappiness, sc);
/*
* Direct reclaim and kswapd have to scan all memory
@@ -2287,20 +2297,21 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
sc->nr_scanned - nr_scanned,
sc->nr_reclaimed - nr_reclaimed);
+ if (sc->nr_reclaimed - nr_reclaimed)
+ reclaimable = true;
+
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
+
+ return reclaimable;
}
/* Returns true if compaction should go ahead for a high-order request */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+static inline bool compaction_ready(struct zone *zone, int order)
{
unsigned long balance_gap, watermark;
bool watermark_ok;
- /* Do not consider compaction for orders reclaim is meant to satisfy */
- if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
- return false;
-
/*
* Compaction takes time to run and there are potentially other
* callers using the pages just freed. Continue reclaiming until
@@ -2309,18 +2320,18 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
*/
balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
- watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+ watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
/*
* If compaction is deferred, reclaim up to a point where
* compaction will have a chance of success when re-enabled
*/
- if (compaction_deferred(zone, sc->order))
+ if (compaction_deferred(zone, order))
return watermark_ok;
/* If compaction is not ready to start, keep reclaiming */
- if (!compaction_suitable(zone, sc->order))
+ if (!compaction_suitable(zone, order))
return false;
return watermark_ok;
@@ -2342,10 +2353,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
* If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it.
*
- * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is ready to begin. This indicates to
- * the caller that it should consider retrying the allocation instead of
- * further reclaim.
+ * Returns true if a zone was reclaimable.
*/
static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{
@@ -2354,13 +2362,13 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
unsigned long lru_pages = 0;
- bool aborted_reclaim = false;
struct reclaim_state *reclaim_state = current->reclaim_state;
gfp_t orig_mask;
struct shrink_control shrink = {
.gfp_mask = sc->gfp_mask,
};
enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
+ bool reclaimable = false;
/*
* If the number of buffer_heads in the machine exceeds the maximum
@@ -2391,22 +2399,24 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
if (sc->priority != DEF_PRIORITY &&
!zone_reclaimable(zone))
continue; /* Let kswapd poll it */
- if (IS_ENABLED(CONFIG_COMPACTION)) {
- /*
- * If we already have plenty of memory free for
- * compaction in this zone, don't free any more.
- * Even though compaction is invoked for any
- * non-zero order, only frequent costly order
- * reclamation is disruptive enough to become a
- * noticeable problem, like transparent huge
- * page allocations.
- */
- if ((zonelist_zone_idx(z) <= requested_highidx)
- && compaction_ready(zone, sc)) {
- aborted_reclaim = true;
- continue;
- }
+
+ /*
+ * If we already have plenty of memory free for
+ * compaction in this zone, don't free any more.
+ * Even though compaction is invoked for any
+ * non-zero order, only frequent costly order
+ * reclamation is disruptive enough to become a
+ * noticeable problem, like transparent huge
+ * page allocations.
+ */
+ if (IS_ENABLED(CONFIG_COMPACTION) &&
+ sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+ zonelist_zone_idx(z) <= requested_highidx &&
+ compaction_ready(zone, sc->order)) {
+ sc->flags |= COMPACTION_READY;
+ continue;
}
+
/*
* This steals pages from memory cgroups over softlimit
* and returns the number of reclaimed pages and
@@ -2419,10 +2429,17 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
&nr_soft_scanned);
sc->nr_reclaimed += nr_soft_reclaimed;
sc->nr_scanned += nr_soft_scanned;
+ if (nr_soft_reclaimed)
+ reclaimable = true;
/* need some check for avoid more shrink_zone() */
}
- shrink_zone(zone, sc);
+ if (shrink_zone(zone, sc))
+ reclaimable = true;
+
+ if (global_reclaim(sc) &&
+ !reclaimable && zone_reclaimable(zone))
+ reclaimable = true;
}
/*
@@ -2445,27 +2462,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
*/
sc->gfp_mask = orig_mask;
- return aborted_reclaim;
-}
-
-/* All zones in zonelist are unreclaimable? */
-static bool all_unreclaimable(struct zonelist *zonelist,
- struct scan_control *sc)
-{
- struct zoneref *z;
- struct zone *zone;
-
- for_each_zone_zonelist_nodemask(zone, z, zonelist,
- gfp_zone(sc->gfp_mask), sc->nodemask) {
- if (!populated_zone(zone))
- continue;
- if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
- continue;
- if (zone_reclaimable(zone))
- return false;
- }
-
- return true;
+ return reclaimable;
}
/*
@@ -2489,7 +2486,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
{
unsigned long total_scanned = 0;
unsigned long writeback_threshold;
- bool aborted_reclaim;
+ bool zones_reclaimable;
delayacct_freepages_start();
@@ -2500,18 +2497,21 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
sc->priority);
sc->nr_scanned = 0;
- aborted_reclaim = shrink_zones(zonelist, sc);
+ zones_reclaimable = shrink_zones(zonelist, sc);
total_scanned += sc->nr_scanned;
if (sc->nr_reclaimed >= sc->nr_to_reclaim)
- goto out;
+ break;
+
+ if (sc->flags & COMPACTION_READY)
+ break;
/*
* If we're getting trouble reclaiming, start doing
* writepage even in laptop mode.
*/
if (sc->priority < DEF_PRIORITY - 2)
- sc->may_writepage = 1;
+ sc->flags |= MAY_WRITEPAGE;
/*
* Try to write back as many pages as we just scanned. This
@@ -2524,30 +2524,21 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
if (total_scanned > writeback_threshold) {
wakeup_flusher_threads(laptop_mode ? 0 : total_scanned,
WB_REASON_TRY_TO_FREE_PAGES);
- sc->may_writepage = 1;
+ sc->flags |= MAY_WRITEPAGE;
}
- } while (--sc->priority >= 0 && !aborted_reclaim);
+ } while (--sc->priority >= 0);
-out:
delayacct_freepages_end();
if (sc->nr_reclaimed)
return sc->nr_reclaimed;
- /*
- * As hibernation is going on, kswapd is freezed so that it can't mark
- * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
- * check.
- */
- if (oom_killer_disabled)
- return 0;
-
/* Aborted reclaim to try compaction? don't OOM, then */
- if (aborted_reclaim)
+ if (sc->flags & COMPACTION_READY)
return 1;
- /* top priority shrink_zones still had more to do? don't OOM, then */
- if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
+ /* Any of the zones still reclaimable? Don't OOM. */
+ if (zones_reclaimable)
return 1;
return 0;
@@ -2684,17 +2675,17 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
{
unsigned long nr_reclaimed;
struct scan_control sc = {
- .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
- .may_writepage = !laptop_mode,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
- .may_unmap = 1,
- .may_swap = 1,
+ .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
.order = order,
- .priority = DEF_PRIORITY,
- .target_mem_cgroup = NULL,
.nodemask = nodemask,
+ .priority = DEF_PRIORITY,
+ .flags = MAY_UNMAP | MAY_SWAP,
};
+ if (!laptop_mode)
+ sc.flags |= MAY_WRITEPAGE;
+
/*
* Do not enter reclaim if fatal signal was delivered while throttled.
* 1 is returned so that the page allocator does not OOM kill at this
@@ -2704,7 +2695,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
return 1;
trace_mm_vmscan_direct_reclaim_begin(order,
- sc.may_writepage,
+ sc.flags & MAY_WRITEPAGE,
gfp_mask);
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
@@ -2722,23 +2713,22 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
unsigned long *nr_scanned)
{
struct scan_control sc = {
- .nr_scanned = 0,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
- .may_writepage = !laptop_mode,
- .may_unmap = 1,
- .may_swap = !noswap,
- .order = 0,
- .priority = 0,
- .swappiness = mem_cgroup_swappiness(memcg),
+ .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+ (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
.target_mem_cgroup = memcg,
+ .flags = MAY_UNMAP,
};
struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+ int swappiness = mem_cgroup_swappiness(memcg);
- sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
- (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
+ if (!laptop_mode)
+ sc.flags |= MAY_WRITEPAGE;
+ if (!noswap)
+ sc.flags |= MAY_SWAP;
trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
- sc.may_writepage,
+ sc.flags & MAY_WRITEPAGE,
sc.gfp_mask);
/*
@@ -2748,7 +2738,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
* will pick up pages from other mem cgroup's as well. We hack
* the priority and make it zero.
*/
- shrink_lruvec(lruvec, &sc);
+ shrink_lruvec(lruvec, swappiness, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
@@ -2764,18 +2754,19 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_reclaimed;
int nid;
struct scan_control sc = {
- .may_writepage = !laptop_mode,
- .may_unmap = 1,
- .may_swap = !noswap,
.nr_to_reclaim = SWAP_CLUSTER_MAX,
- .order = 0,
- .priority = DEF_PRIORITY,
- .target_mem_cgroup = memcg,
- .nodemask = NULL, /* we don't care the placement */
.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
- (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+ (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+ .target_mem_cgroup = memcg,
+ .priority = DEF_PRIORITY,
+ .flags = MAY_UNMAP,
};
+ if (!laptop_mode)
+ sc.flags |= MAY_WRITEPAGE;
+ if (!noswap)
+ sc.flags |= MAY_SWAP;
+
/*
* Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
* take care of from where we get pages. So the node where we start the
@@ -2786,7 +2777,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
zonelist = NODE_DATA(nid)->node_zonelists;
trace_mm_vmscan_memcg_reclaim_begin(0,
- sc.may_writepage,
+ sc.flags & MAY_WRITEPAGE,
sc.gfp_mask);
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
@@ -3031,15 +3022,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
unsigned long nr_soft_scanned;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
- .priority = DEF_PRIORITY,
- .may_unmap = 1,
- .may_swap = 1,
- .may_writepage = !laptop_mode,
.order = order,
- .target_mem_cgroup = NULL,
+ .priority = DEF_PRIORITY,
+ .flags = MAY_UNMAP | MAY_SWAP,
};
count_vm_event(PAGEOUTRUN);
+ if (!laptop_mode)
+ sc.flags |= MAY_WRITEPAGE;
+
do {
unsigned long lru_pages = 0;
unsigned long nr_attempted = 0;
@@ -3120,7 +3111,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
* even in laptop mode.
*/
if (sc.priority < DEF_PRIORITY - 2)
- sc.may_writepage = 1;
+ sc.flags |= MAY_WRITEPAGE;
/*
* Now scan the zone in the dma->highmem direction, stopping
@@ -3417,14 +3408,11 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
{
struct reclaim_state reclaim_state;
struct scan_control sc = {
- .gfp_mask = GFP_HIGHUSER_MOVABLE,
- .may_swap = 1,
- .may_unmap = 1,
- .may_writepage = 1,
.nr_to_reclaim = nr_to_reclaim,
- .hibernation_mode = 1,
- .order = 0,
+ .gfp_mask = GFP_HIGHUSER_MOVABLE,
.priority = DEF_PRIORITY,
+ .flags = MAY_WRITEPAGE | MAY_UNMAP | MAY_SWAP |
+ MAY_SKIP_CONGESTION,
};
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
struct task_struct *p = current;
@@ -3604,19 +3592,22 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
struct task_struct *p = current;
struct reclaim_state reclaim_state;
struct scan_control sc = {
- .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
- .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
- .may_swap = 1,
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
.order = order,
.priority = ZONE_RECLAIM_PRIORITY,
+ .flags = MAY_SWAP,
};
struct shrink_control shrink = {
.gfp_mask = sc.gfp_mask,
};
unsigned long nr_slab_pages0, nr_slab_pages1;
+ if (zone_reclaim_mode & RECLAIM_WRITE)
+ sc.flags |= MAY_WRITEPAGE;
+ if (zone_reclaim_mode & RECLAIM_SWAP)
+ sc.flags |= MAY_UNMAP;
+
cond_resched();
/*
* We need to be able to allocate from the reserves for RECLAIM_SWAP
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b37bd49bfd55..c1cae63e79fb 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -7,6 +7,7 @@
* zoned VM statistics
* Copyright (C) 2006 Silicon Graphics, Inc.,
* Christoph Lameter <christoph@lameter.com>
+ * Copyright (C) 2008-2014 Christoph Lameter
*/
#include <linux/fs.h>
#include <linux/mm.h>
@@ -14,6 +15,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/cpumask.h>
#include <linux/vmstat.h>
#include <linux/sched.h>
#include <linux/math64.h>
@@ -200,7 +202,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
continue;
threshold = (*calculate_pressure)(zone);
- for_each_possible_cpu(cpu)
+ for_each_online_cpu(cpu)
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
= threshold;
}
@@ -419,13 +421,22 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
EXPORT_SYMBOL(dec_zone_page_state);
#endif
-static inline void fold_diff(int *diff)
+
+/*
+ * Fold a differential into the global counters.
+ * Returns the number of counters updated.
+ */
+static int fold_diff(int *diff)
{
int i;
+ int changes = 0;
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
- if (diff[i])
+ if (diff[i]) {
atomic_long_add(diff[i], &vm_stat[i]);
+ changes++;
+ }
+ return changes;
}
/*
@@ -441,12 +452,15 @@ static inline void fold_diff(int *diff)
* statistics in the remote zone struct as well as the global cachelines
* with the global counters. These could cause remote node cache line
* bouncing and will have to be only done when necessary.
+ *
+ * The function returns the number of global counters updated.
*/
-static void refresh_cpu_vm_stats(void)
+static int refresh_cpu_vm_stats(void)
{
struct zone *zone;
int i;
int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+ int changes = 0;
for_each_populated_zone(zone) {
struct per_cpu_pageset __percpu *p = zone->pageset;
@@ -486,15 +500,17 @@ static void refresh_cpu_vm_stats(void)
continue;
}
-
if (__this_cpu_dec_return(p->expire))
continue;
- if (__this_cpu_read(p->pcp.count))
+ if (__this_cpu_read(p->pcp.count)) {
drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
+ changes++;
+ }
#endif
}
- fold_diff(global_diff);
+ changes += fold_diff(global_diff);
+ return changes;
}
/*
@@ -763,6 +779,7 @@ const char * const vmstat_text[] = {
"nr_shmem",
"nr_dirtied",
"nr_written",
+ "nr_pages_scanned",
#ifdef CONFIG_NUMA
"numa_hit",
@@ -1067,7 +1084,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
min_wmark_pages(zone),
low_wmark_pages(zone),
high_wmark_pages(zone),
- zone->pages_scanned,
+ zone_page_state(zone, NR_PAGES_SCANNED),
zone->spanned_pages,
zone->present_pages,
zone->managed_pages);
@@ -1077,10 +1094,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
zone_page_state(zone, i));
seq_printf(m,
- "\n protection: (%lu",
+ "\n protection: (%ld",
zone->lowmem_reserve[0]);
for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
- seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
+ seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
seq_printf(m,
")"
"\n pagesets");
@@ -1228,20 +1245,106 @@ static const struct file_operations proc_vmstat_file_operations = {
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;
+cpumask_var_t cpu_stat_off;
static void vmstat_update(struct work_struct *w)
{
- refresh_cpu_vm_stats();
- schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+ if (refresh_cpu_vm_stats())
+ /*
+ * Counters were updated so we expect more updates
+ * to occur in the future. Keep on running the
+ * update worker thread.
+ */
+ schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+ round_jiffies_relative(sysctl_stat_interval));
+ else {
+ /*
+ * We did not update any counters so the app may be in
+ * a mode where it does not cause counter updates.
+ * We may be uselessly running vmstat_update.
+ * Defer the checking for differentials to the
+ * shepherd thread on a different processor.
+ */
+ int r;
+ /*
+ * Shepherd work thread does not race since it never
+ * changes the bit if its zero but the cpu
+ * online / off line code may race if
+ * worker threads are still allowed during
+ * shutdown / startup.
+ */
+ r = cpumask_test_and_set_cpu(smp_processor_id(),
+ cpu_stat_off);
+ VM_BUG_ON(r);
+ }
+}
+
+/*
+ * Check if the diffs for a certain cpu indicate that
+ * an update is needed.
+ */
+static bool need_update(int cpu)
+{
+ struct zone *zone;
+
+ for_each_populated_zone(zone) {
+ struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
+
+ BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
+ /*
+ * The fast way of checking if there are any vmstat diffs.
+ * This works because the diffs are byte sized items.
+ */
+ if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
+ return true;
+
+ }
+ return false;
+}
+
+
+/*
+ * Shepherd worker thread that checks the
+ * differentials of processors that have their worker
+ * threads for vm statistics updates disabled because of
+ * inactivity.
+ */
+static void vmstat_shepherd(struct work_struct *w);
+
+static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd);
+
+static void vmstat_shepherd(struct work_struct *w)
+{
+ int cpu;
+
+ /* Check processors whose vmstat worker threads have been disabled */
+ for_each_cpu(cpu, cpu_stat_off)
+ if (need_update(cpu) &&
+ cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
+
+ schedule_delayed_work_on(cpu, &per_cpu(vmstat_work, cpu),
+ __round_jiffies_relative(sysctl_stat_interval, cpu));
+
+
+ schedule_delayed_work(&shepherd,
round_jiffies_relative(sysctl_stat_interval));
+
}
-static void start_cpu_timer(int cpu)
+static void __init start_shepherd_timer(void)
{
- struct delayed_work *work = &per_cpu(vmstat_work, cpu);
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
+ vmstat_update);
+
+ if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
+ BUG();
+ cpumask_copy(cpu_stat_off, cpu_online_mask);
- INIT_DEFERRABLE_WORK(work, vmstat_update);
- schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
+ schedule_delayed_work(&shepherd,
+ round_jiffies_relative(sysctl_stat_interval));
}
static void vmstat_cpu_dead(int node)
@@ -1272,17 +1375,17 @@ static int vmstat_cpuup_callback(struct notifier_block *nfb,
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
refresh_zone_stat_thresholds();
- start_cpu_timer(cpu);
node_set_state(cpu_to_node(cpu), N_CPU);
+ cpumask_set_cpu(cpu, cpu_stat_off);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
- cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
- per_cpu(vmstat_work, cpu).work.func = NULL;
+ if (!cpumask_test_and_set_cpu(cpu, cpu_stat_off))
+ cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- start_cpu_timer(cpu);
+ cpumask_set_cpu(cpu, cpu_stat_off);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
@@ -1302,15 +1405,10 @@ static struct notifier_block vmstat_notifier =
static int __init setup_vmstat(void)
{
#ifdef CONFIG_SMP
- int cpu;
-
cpu_notifier_register_begin();
__register_cpu_notifier(&vmstat_notifier);
- for_each_online_cpu(cpu) {
- start_cpu_timer(cpu);
- node_set_state(cpu_to_node(cpu), N_CPU);
- }
+ start_shepherd_timer();
cpu_notifier_register_done();
#endif
#ifdef CONFIG_PROC_FS
diff --git a/mm/zbud.c b/mm/zbud.c
index 01df13a7e2e1..a05790b1915e 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -51,6 +51,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/zbud.h>
+#include <linux/zpool.h>
/*****************
* Structures
@@ -113,6 +114,90 @@ struct zbud_header {
};
/*****************
+ * zpool
+ ****************/
+
+#ifdef CONFIG_ZPOOL
+
+static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle)
+{
+ return zpool_evict(pool, handle);
+}
+
+static struct zbud_ops zbud_zpool_ops = {
+ .evict = zbud_zpool_evict
+};
+
+static void *zbud_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+ return zbud_create_pool(gfp, &zbud_zpool_ops);
+}
+
+static void zbud_zpool_destroy(void *pool)
+{
+ zbud_destroy_pool(pool);
+}
+
+static int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp,
+ unsigned long *handle)
+{
+ return zbud_alloc(pool, size, gfp, handle);
+}
+static void zbud_zpool_free(void *pool, unsigned long handle)
+{
+ zbud_free(pool, handle);
+}
+
+static int zbud_zpool_shrink(void *pool, unsigned int pages,
+ unsigned int *reclaimed)
+{
+ unsigned int total = 0;
+ int ret = -EINVAL;
+
+ while (total < pages) {
+ ret = zbud_reclaim_page(pool, 8);
+ if (ret < 0)
+ break;
+ total++;
+ }
+
+ if (reclaimed)
+ *reclaimed = total;
+
+ return ret;
+}
+
+static void *zbud_zpool_map(void *pool, unsigned long handle,
+ enum zpool_mapmode mm)
+{
+ return zbud_map(pool, handle);
+}
+static void zbud_zpool_unmap(void *pool, unsigned long handle)
+{
+ zbud_unmap(pool, handle);
+}
+
+static u64 zbud_zpool_total_size(void *pool)
+{
+ return zbud_get_pool_size(pool) * PAGE_SIZE;
+}
+
+static struct zpool_driver zbud_zpool_driver = {
+ .type = "zbud",
+ .owner = THIS_MODULE,
+ .create = zbud_zpool_create,
+ .destroy = zbud_zpool_destroy,
+ .malloc = zbud_zpool_malloc,
+ .free = zbud_zpool_free,
+ .shrink = zbud_zpool_shrink,
+ .map = zbud_zpool_map,
+ .unmap = zbud_zpool_unmap,
+ .total_size = zbud_zpool_total_size,
+};
+
+#endif /* CONFIG_ZPOOL */
+
+/*****************
* Helpers
*****************/
/* Just to make the code easier to read */
@@ -122,7 +207,7 @@ enum buddy {
};
/* Converts an allocation size in bytes to size in zbud chunks */
-static int size_to_chunks(int size)
+static int size_to_chunks(size_t size)
{
return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
}
@@ -247,7 +332,7 @@ void zbud_destroy_pool(struct zbud_pool *pool)
* gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
* a new page.
*/
-int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
unsigned long *handle)
{
int chunks, i, freechunks;
@@ -511,11 +596,20 @@ static int __init init_zbud(void)
/* Make sure the zbud header will fit in one chunk */
BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
pr_info("loaded\n");
+
+#ifdef CONFIG_ZPOOL
+ zpool_register_driver(&zbud_zpool_driver);
+#endif
+
return 0;
}
static void __exit exit_zbud(void)
{
+#ifdef CONFIG_ZPOOL
+ zpool_unregister_driver(&zbud_zpool_driver);
+#endif
+
pr_info("unloaded\n");
}
diff --git a/mm/zpool.c b/mm/zpool.c
new file mode 100644
index 000000000000..e40612a1df00
--- /dev/null
+++ b/mm/zpool.c
@@ -0,0 +1,364 @@
+/*
+ * zpool memory storage api
+ *
+ * Copyright (C) 2014 Dan Streetman
+ *
+ * This is a common frontend for memory storage pool implementations.
+ * Typically, this is used to store compressed memory.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/zpool.h>
+
+struct zpool {
+ char *type;
+
+ struct zpool_driver *driver;
+ void *pool;
+ struct zpool_ops *ops;
+
+ struct list_head list;
+};
+
+static LIST_HEAD(drivers_head);
+static DEFINE_SPINLOCK(drivers_lock);
+
+static LIST_HEAD(pools_head);
+static DEFINE_SPINLOCK(pools_lock);
+
+/**
+ * zpool_register_driver() - register a zpool implementation.
+ * @driver: driver to register
+ */
+void zpool_register_driver(struct zpool_driver *driver)
+{
+ spin_lock(&drivers_lock);
+ atomic_set(&driver->refcount, 0);
+ list_add(&driver->list, &drivers_head);
+ spin_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(zpool_register_driver);
+
+/**
+ * zpool_unregister_driver() - unregister a zpool implementation.
+ * @driver: driver to unregister.
+ *
+ * Module usage counting is used to prevent using a driver
+ * while/after unloading, so if this is called from module
+ * exit function, this should never fail; if called from
+ * other than the module exit function, and this returns
+ * failure, the driver is in use and must remain available.
+ */
+int zpool_unregister_driver(struct zpool_driver *driver)
+{
+ int ret = 0, refcount;
+
+ spin_lock(&drivers_lock);
+ refcount = atomic_read(&driver->refcount);
+ WARN_ON(refcount < 0);
+ if (refcount > 0)
+ ret = -EBUSY;
+ else
+ list_del(&driver->list);
+ spin_unlock(&drivers_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(zpool_unregister_driver);
+
+/**
+ * zpool_evict() - evict callback from a zpool implementation.
+ * @pool: pool to evict from.
+ * @handle: handle to evict.
+ *
+ * This can be used by zpool implementations to call the
+ * user's evict zpool_ops struct evict callback.
+ */
+int zpool_evict(void *pool, unsigned long handle)
+{
+ struct zpool *zpool;
+
+ spin_lock(&pools_lock);
+ list_for_each_entry(zpool, &pools_head, list) {
+ if (zpool->pool == pool) {
+ spin_unlock(&pools_lock);
+ if (!zpool->ops || !zpool->ops->evict)
+ return -EINVAL;
+ return zpool->ops->evict(zpool, handle);
+ }
+ }
+ spin_unlock(&pools_lock);
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL(zpool_evict);
+
+static struct zpool_driver *zpool_get_driver(char *type)
+{
+ struct zpool_driver *driver;
+
+ spin_lock(&drivers_lock);
+ list_for_each_entry(driver, &drivers_head, list) {
+ if (!strcmp(driver->type, type)) {
+ bool got = try_module_get(driver->owner);
+
+ if (got)
+ atomic_inc(&driver->refcount);
+ spin_unlock(&drivers_lock);
+ return got ? driver : NULL;
+ }
+ }
+
+ spin_unlock(&drivers_lock);
+ return NULL;
+}
+
+static void zpool_put_driver(struct zpool_driver *driver)
+{
+ atomic_dec(&driver->refcount);
+ module_put(driver->owner);
+}
+
+/**
+ * zpool_create_pool() - Create a new zpool
+ * @type The type of the zpool to create (e.g. zbud, zsmalloc)
+ * @gfp The GFP flags to use when allocating the pool.
+ * @ops The optional ops callback.
+ *
+ * This creates a new zpool of the specified type. The gfp flags will be
+ * used when allocating memory, if the implementation supports it. If the
+ * ops param is NULL, then the created zpool will not be shrinkable.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: New zpool on success, NULL on failure.
+ */
+struct zpool *zpool_create_pool(char *type, gfp_t gfp, struct zpool_ops *ops)
+{
+ struct zpool_driver *driver;
+ struct zpool *zpool;
+
+ pr_info("creating pool type %s\n", type);
+
+ driver = zpool_get_driver(type);
+
+ if (!driver) {
+ request_module(type);
+ driver = zpool_get_driver(type);
+ }
+
+ if (!driver) {
+ pr_err("no driver for type %s\n", type);
+ return NULL;
+ }
+
+ zpool = kmalloc(sizeof(*zpool), gfp);
+ if (!zpool) {
+ pr_err("couldn't create zpool - out of memory\n");
+ zpool_put_driver(driver);
+ return NULL;
+ }
+
+ zpool->type = driver->type;
+ zpool->driver = driver;
+ zpool->pool = driver->create(gfp, ops);
+ zpool->ops = ops;
+
+ if (!zpool->pool) {
+ pr_err("couldn't create %s pool\n", type);
+ zpool_put_driver(driver);
+ kfree(zpool);
+ return NULL;
+ }
+
+ pr_info("created %s pool\n", type);
+
+ spin_lock(&pools_lock);
+ list_add(&zpool->list, &pools_head);
+ spin_unlock(&pools_lock);
+
+ return zpool;
+}
+
+/**
+ * zpool_destroy_pool() - Destroy a zpool
+ * @pool The zpool to destroy.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when destroying different pools. The same
+ * pool should only be destroyed once, and should not be used
+ * after it is destroyed.
+ *
+ * This destroys an existing zpool. The zpool should not be in use.
+ */
+void zpool_destroy_pool(struct zpool *zpool)
+{
+ pr_info("destroying pool type %s\n", zpool->type);
+
+ spin_lock(&pools_lock);
+ list_del(&zpool->list);
+ spin_unlock(&pools_lock);
+ zpool->driver->destroy(zpool->pool);
+ zpool_put_driver(zpool->driver);
+ kfree(zpool);
+}
+
+/**
+ * zpool_get_type() - Get the type of the zpool
+ * @pool The zpool to check
+ *
+ * This returns the type of the pool.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: The type of zpool.
+ */
+char *zpool_get_type(struct zpool *zpool)
+{
+ return zpool->type;
+}
+
+/**
+ * zpool_malloc() - Allocate memory
+ * @pool The zpool to allocate from.
+ * @size The amount of memory to allocate.
+ * @gfp The GFP flags to use when allocating memory.
+ * @handle Pointer to the handle to set
+ *
+ * This allocates the requested amount of memory from the pool.
+ * The gfp flags will be used when allocating memory, if the
+ * implementation supports it. The provided @handle will be
+ * set to the allocated object handle.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error.
+ */
+int zpool_malloc(struct zpool *zpool, size_t size, gfp_t gfp,
+ unsigned long *handle)
+{
+ return zpool->driver->malloc(zpool->pool, size, gfp, handle);
+}
+
+/**
+ * zpool_free() - Free previously allocated memory
+ * @pool The zpool that allocated the memory.
+ * @handle The handle to the memory to free.
+ *
+ * This frees previously allocated memory. This does not guarantee
+ * that the pool will actually free memory, only that the memory
+ * in the pool will become available for use by the pool.
+ *
+ * Implementations must guarantee this to be thread-safe,
+ * however only when freeing different handles. The same
+ * handle should only be freed once, and should not be used
+ * after freeing.
+ */
+void zpool_free(struct zpool *zpool, unsigned long handle)
+{
+ zpool->driver->free(zpool->pool, handle);
+}
+
+/**
+ * zpool_shrink() - Shrink the pool size
+ * @pool The zpool to shrink.
+ * @pages The number of pages to shrink the pool.
+ * @reclaimed The number of pages successfully evicted.
+ *
+ * This attempts to shrink the actual memory size of the pool
+ * by evicting currently used handle(s). If the pool was
+ * created with no zpool_ops, or the evict call fails for any
+ * of the handles, this will fail. If non-NULL, the @reclaimed
+ * parameter will be set to the number of pages reclaimed,
+ * which may be more than the number of pages requested.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: 0 on success, negative value on error/failure.
+ */
+int zpool_shrink(struct zpool *zpool, unsigned int pages,
+ unsigned int *reclaimed)
+{
+ return zpool->driver->shrink(zpool->pool, pages, reclaimed);
+}
+
+/**
+ * zpool_map_handle() - Map a previously allocated handle into memory
+ * @pool The zpool that the handle was allocated from
+ * @handle The handle to map
+ * @mm How the memory should be mapped
+ *
+ * This maps a previously allocated handle into memory. The @mm
+ * param indicates to the implementation how the memory will be
+ * used, i.e. read-only, write-only, read-write. If the
+ * implementation does not support it, the memory will be treated
+ * as read-write.
+ *
+ * This may hold locks, disable interrupts, and/or preemption,
+ * and the zpool_unmap_handle() must be called to undo those
+ * actions. The code that uses the mapped handle should complete
+ * its operatons on the mapped handle memory quickly and unmap
+ * as soon as possible. As the implementation may use per-cpu
+ * data, multiple handles should not be mapped concurrently on
+ * any cpu.
+ *
+ * Returns: A pointer to the handle's mapped memory area.
+ */
+void *zpool_map_handle(struct zpool *zpool, unsigned long handle,
+ enum zpool_mapmode mapmode)
+{
+ return zpool->driver->map(zpool->pool, handle, mapmode);
+}
+
+/**
+ * zpool_unmap_handle() - Unmap a previously mapped handle
+ * @pool The zpool that the handle was allocated from
+ * @handle The handle to unmap
+ *
+ * This unmaps a previously mapped handle. Any locks or other
+ * actions that the implementation took in zpool_map_handle()
+ * will be undone here. The memory area returned from
+ * zpool_map_handle() should no longer be used after this.
+ */
+void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
+{
+ zpool->driver->unmap(zpool->pool, handle);
+}
+
+/**
+ * zpool_get_total_size() - The total size of the pool
+ * @pool The zpool to check
+ *
+ * This returns the total size in bytes of the pool.
+ *
+ * Returns: Total size of the zpool in bytes.
+ */
+u64 zpool_get_total_size(struct zpool *zpool)
+{
+ return zpool->driver->total_size(zpool->pool);
+}
+
+static int __init init_zpool(void)
+{
+ pr_info("loaded\n");
+ return 0;
+}
+
+static void __exit exit_zpool(void)
+{
+ pr_info("unloaded\n");
+}
+
+module_init(init_zpool);
+module_exit(exit_zpool);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+MODULE_DESCRIPTION("Common API for compressed memory storage");
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index fe78189624cf..4e2fc83cb394 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -92,6 +92,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/zsmalloc.h>
+#include <linux/zpool.h>
/*
* This must be power of 2 and greater than of equal to sizeof(link_free).
@@ -240,6 +241,81 @@ struct mapping_area {
enum zs_mapmode vm_mm; /* mapping mode */
};
+/* zpool driver */
+
+#ifdef CONFIG_ZPOOL
+
+static void *zs_zpool_create(gfp_t gfp, struct zpool_ops *zpool_ops)
+{
+ return zs_create_pool(gfp);
+}
+
+static void zs_zpool_destroy(void *pool)
+{
+ zs_destroy_pool(pool);
+}
+
+static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
+ unsigned long *handle)
+{
+ *handle = zs_malloc(pool, size);
+ return *handle ? 0 : -1;
+}
+static void zs_zpool_free(void *pool, unsigned long handle)
+{
+ zs_free(pool, handle);
+}
+
+static int zs_zpool_shrink(void *pool, unsigned int pages,
+ unsigned int *reclaimed)
+{
+ return -EINVAL;
+}
+
+static void *zs_zpool_map(void *pool, unsigned long handle,
+ enum zpool_mapmode mm)
+{
+ enum zs_mapmode zs_mm;
+
+ switch (mm) {
+ case ZPOOL_MM_RO:
+ zs_mm = ZS_MM_RO;
+ break;
+ case ZPOOL_MM_WO:
+ zs_mm = ZS_MM_WO;
+ break;
+ case ZPOOL_MM_RW: /* fallthru */
+ default:
+ zs_mm = ZS_MM_RW;
+ break;
+ }
+
+ return zs_map_object(pool, handle, zs_mm);
+}
+static void zs_zpool_unmap(void *pool, unsigned long handle)
+{
+ zs_unmap_object(pool, handle);
+}
+
+static u64 zs_zpool_total_size(void *pool)
+{
+ return zs_get_total_size_bytes(pool);
+}
+
+static struct zpool_driver zs_zpool_driver = {
+ .type = "zsmalloc",
+ .owner = THIS_MODULE,
+ .create = zs_zpool_create,
+ .destroy = zs_zpool_destroy,
+ .malloc = zs_zpool_malloc,
+ .free = zs_zpool_free,
+ .shrink = zs_zpool_shrink,
+ .map = zs_zpool_map,
+ .unmap = zs_zpool_unmap,
+ .total_size = zs_zpool_total_size,
+};
+
+#endif /* CONFIG_ZPOOL */
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
@@ -690,7 +766,7 @@ static inline void __zs_cpu_down(struct mapping_area *area)
static inline void *__zs_map_object(struct mapping_area *area,
struct page *pages[2], int off, int size)
{
- BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
+ BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
area->vm_addr = area->vm->addr;
return area->vm_addr + off;
}
@@ -814,6 +890,10 @@ static void zs_exit(void)
{
int cpu;
+#ifdef CONFIG_ZPOOL
+ zpool_unregister_driver(&zs_zpool_driver);
+#endif
+
cpu_notifier_register_begin();
for_each_online_cpu(cpu)
@@ -840,6 +920,10 @@ static int zs_init(void)
cpu_notifier_register_done();
+#ifdef CONFIG_ZPOOL
+ zpool_register_driver(&zs_zpool_driver);
+#endif
+
return 0;
fail:
zs_exit();
diff --git a/mm/zswap.c b/mm/zswap.c
index 008388fe7b0f..ea064c1a09ba 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -34,7 +34,7 @@
#include <linux/swap.h>
#include <linux/crypto.h>
#include <linux/mempool.h>
-#include <linux/zbud.h>
+#include <linux/zpool.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
@@ -45,8 +45,8 @@
/*********************************
* statistics
**********************************/
-/* Number of memory pages used by the compressed pool */
-static u64 zswap_pool_pages;
+/* Total bytes used by the compressed storage */
+static u64 zswap_pool_total_size;
/* The number of compressed pages currently stored in zswap */
static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
@@ -89,8 +89,13 @@ static unsigned int zswap_max_pool_percent = 20;
module_param_named(max_pool_percent,
zswap_max_pool_percent, uint, 0644);
-/* zbud_pool is shared by all of zswap backend */
-static struct zbud_pool *zswap_pool;
+/* Compressed storage to use */
+#define ZSWAP_ZPOOL_DEFAULT "zbud"
+static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+module_param_named(zpool, zswap_zpool_type, charp, 0444);
+
+/* zpool is shared by all of zswap backend */
+static struct zpool *zswap_pool;
/*********************************
* compression functions
@@ -168,7 +173,7 @@ static void zswap_comp_exit(void)
* be held while changing the refcount. Since the lock must
* be held, there is no reason to also make refcount atomic.
* offset - the swap offset for the entry. Index into the red-black tree.
- * handle - zbud allocation handle that stores the compressed page data
+ * handle - zpool allocation handle that stores the compressed page data
* length - the length in bytes of the compressed page data. Needed during
* decompression
*/
@@ -207,7 +212,7 @@ static int zswap_entry_cache_create(void)
return zswap_entry_cache == NULL;
}
-static void zswap_entry_cache_destory(void)
+static void __init zswap_entry_cache_destroy(void)
{
kmem_cache_destroy(zswap_entry_cache);
}
@@ -284,15 +289,15 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
}
/*
- * Carries out the common pattern of freeing and entry's zbud allocation,
+ * Carries out the common pattern of freeing and entry's zpool allocation,
* freeing the entry itself, and decrementing the number of stored pages.
*/
static void zswap_free_entry(struct zswap_entry *entry)
{
- zbud_free(zswap_pool, entry->handle);
+ zpool_free(zswap_pool, entry->handle);
zswap_entry_cache_free(entry);
atomic_dec(&zswap_stored_pages);
- zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+ zswap_pool_total_size = zpool_get_total_size(zswap_pool);
}
/* caller must hold the tree lock */
@@ -409,7 +414,7 @@ cleanup:
static bool zswap_is_full(void)
{
return totalram_pages * zswap_max_pool_percent / 100 <
- zswap_pool_pages;
+ DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
}
/*********************************
@@ -502,7 +507,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
* add_to_swap_cache() doesn't return -EEXIST, so we can safely
* clear SWAP_HAS_CACHE flag.
*/
- swapcache_free(entry, NULL);
+ swapcache_free(entry);
} while (err != -ENOMEM);
if (new_page)
@@ -525,7 +530,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
* the swap cache, the compressed version stored by zswap can be
* freed.
*/
-static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
+static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
{
struct zswap_header *zhdr;
swp_entry_t swpentry;
@@ -541,9 +546,9 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
};
/* extract swpentry from data */
- zhdr = zbud_map(pool, handle);
+ zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
swpentry = zhdr->swpentry; /* here */
- zbud_unmap(pool, handle);
+ zpool_unmap_handle(pool, handle);
tree = zswap_trees[swp_type(swpentry)];
offset = swp_offset(swpentry);
@@ -573,13 +578,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
case ZSWAP_SWAPCACHE_NEW: /* page is locked */
/* decompress */
dlen = PAGE_SIZE;
- src = (u8 *)zbud_map(zswap_pool, entry->handle) +
- sizeof(struct zswap_header);
+ src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+ ZPOOL_MM_RO) + sizeof(struct zswap_header);
dst = kmap_atomic(page);
ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
entry->length, dst, &dlen);
kunmap_atomic(dst);
- zbud_unmap(zswap_pool, entry->handle);
+ zpool_unmap_handle(zswap_pool, entry->handle);
BUG_ON(ret);
BUG_ON(dlen != PAGE_SIZE);
@@ -652,7 +657,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
/* reclaim space if needed */
if (zswap_is_full()) {
zswap_pool_limit_hit++;
- if (zbud_reclaim_page(zswap_pool, 8)) {
+ if (zpool_shrink(zswap_pool, 1, NULL)) {
zswap_reject_reclaim_fail++;
ret = -ENOMEM;
goto reject;
@@ -679,7 +684,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
/* store */
len = dlen + sizeof(struct zswap_header);
- ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
+ ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
&handle);
if (ret == -ENOSPC) {
zswap_reject_compress_poor++;
@@ -689,11 +694,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
zswap_reject_alloc_fail++;
goto freepage;
}
- zhdr = zbud_map(zswap_pool, handle);
+ zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW);
zhdr->swpentry = swp_entry(type, offset);
buf = (u8 *)(zhdr + 1);
memcpy(buf, dst, dlen);
- zbud_unmap(zswap_pool, handle);
+ zpool_unmap_handle(zswap_pool, handle);
put_cpu_var(zswap_dstmem);
/* populate entry */
@@ -716,7 +721,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
/* update stats */
atomic_inc(&zswap_stored_pages);
- zswap_pool_pages = zbud_get_pool_size(zswap_pool);
+ zswap_pool_total_size = zpool_get_total_size(zswap_pool);
return 0;
@@ -752,13 +757,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
/* decompress */
dlen = PAGE_SIZE;
- src = (u8 *)zbud_map(zswap_pool, entry->handle) +
- sizeof(struct zswap_header);
+ src = (u8 *)zpool_map_handle(zswap_pool, entry->handle,
+ ZPOOL_MM_RO) + sizeof(struct zswap_header);
dst = kmap_atomic(page);
ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
dst, &dlen);
kunmap_atomic(dst);
- zbud_unmap(zswap_pool, entry->handle);
+ zpool_unmap_handle(zswap_pool, entry->handle);
BUG_ON(ret);
spin_lock(&tree->lock);
@@ -811,7 +816,7 @@ static void zswap_frontswap_invalidate_area(unsigned type)
zswap_trees[type] = NULL;
}
-static struct zbud_ops zswap_zbud_ops = {
+static struct zpool_ops zswap_zpool_ops = {
.evict = zswap_writeback_entry
};
@@ -869,8 +874,8 @@ static int __init zswap_debugfs_init(void)
zswap_debugfs_root, &zswap_written_back_pages);
debugfs_create_u64("duplicate_entry", S_IRUGO,
zswap_debugfs_root, &zswap_duplicate_entry);
- debugfs_create_u64("pool_pages", S_IRUGO,
- zswap_debugfs_root, &zswap_pool_pages);
+ debugfs_create_u64("pool_total_size", S_IRUGO,
+ zswap_debugfs_root, &zswap_pool_total_size);
debugfs_create_atomic_t("stored_pages", S_IRUGO,
zswap_debugfs_root, &zswap_stored_pages);
@@ -895,16 +900,26 @@ static void __exit zswap_debugfs_exit(void) { }
**********************************/
static int __init init_zswap(void)
{
+ gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
+
if (!zswap_enabled)
return 0;
pr_info("loading zswap\n");
- zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
+ zswap_pool = zpool_create_pool(zswap_zpool_type, gfp, &zswap_zpool_ops);
+ if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
+ pr_info("%s zpool not available\n", zswap_zpool_type);
+ zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
+ zswap_pool = zpool_create_pool(zswap_zpool_type, gfp,
+ &zswap_zpool_ops);
+ }
if (!zswap_pool) {
- pr_err("zbud pool creation failed\n");
+ pr_err("%s zpool not available\n", zswap_zpool_type);
+ pr_err("zpool creation failed\n");
goto error;
}
+ pr_info("using %s pool\n", zswap_zpool_type);
if (zswap_entry_cache_create()) {
pr_err("entry cache creation failed\n");
@@ -926,9 +941,9 @@ static int __init init_zswap(void)
pcpufail:
zswap_comp_exit();
compfail:
- zswap_entry_cache_destory();
+ zswap_entry_cache_destroy();
cachefail:
- zbud_destroy_pool(zswap_pool);
+ zpool_destroy_pool(zswap_pool);
error:
return -ENOMEM;
}
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index f14e54a05691..16dd40910c65 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -184,7 +184,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
/* Reached the end of the list, so insert after 'frag_entry_curr'. */
if (likely(frag_entry_curr)) {
- hlist_add_after(&frag_entry_curr->list, &frag_entry_new->list);
+ hlist_add_behind(&frag_entry_new->list, &frag_entry_curr->list);
chain->size += skb->len - hdr_size;
chain->timestamp = jiffies;
ret = true;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b4845f4b2bb4..7751c92c8c57 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br,
}
if (slot)
- hlist_add_after_rcu(slot, &port->rlist);
+ hlist_add_behind_rcu(&port->rlist, slot);
else
hlist_add_head_rcu(&port->rlist, &br->router_list);
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5afeb5aa4c7c..e9cb2588e416 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -940,7 +940,7 @@ static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
last = li;
}
if (last)
- hlist_add_after_rcu(&last->hlist, &new->hlist);
+ hlist_add_behind_rcu(&new->hlist, &last->hlist);
else
hlist_add_before_rcu(&new->hlist, &li->hlist);
}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 731e1e1722d9..fd0dc47f471d 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -277,7 +277,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
last = p;
}
if (last)
- hlist_add_after_rcu(&last->list, &newp->list);
+ hlist_add_behind_rcu(&newp->list, &last->list);
else
hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
out:
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0525d78ba328..beeed602aeb3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -389,7 +389,7 @@ redo:
if (h != h0)
continue;
hlist_del(&pol->bydst);
- hlist_add_after(entry0, &pol->bydst);
+ hlist_add_behind(&pol->bydst, entry0);
}
entry0 = &pol->bydst;
}
@@ -654,7 +654,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
break;
}
if (newpos)
- hlist_add_after(newpos, &policy->bydst);
+ hlist_add_behind(&policy->bydst, newpos);
else
hlist_add_head(&policy->bydst, chain);
xfrm_pol_hold(policy);
diff --git a/scripts/.gitignore b/scripts/.gitignore
index fb070fa1038f..5ecfe93f2028 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -4,7 +4,6 @@
conmakehash
kallsyms
pnmtologo
-bin2c
unifdef
ihex2fw
recordmcount
diff --git a/scripts/Makefile b/scripts/Makefile
index 890df5c6adfb..72902b5f2721 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -13,7 +13,6 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include
hostprogs-$(CONFIG_KALLSYMS) += kallsyms
hostprogs-$(CONFIG_LOGO) += pnmtologo
hostprogs-$(CONFIG_VT) += conmakehash
-hostprogs-$(CONFIG_IKCONFIG) += bin2c
hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable
hostprogs-$(CONFIG_ASN1) += asn1_compiler
diff --git a/scripts/basic/.gitignore b/scripts/basic/.gitignore
index a776371a3502..9528ec9e5adc 100644
--- a/scripts/basic/.gitignore
+++ b/scripts/basic/.gitignore
@@ -1 +1,2 @@
fixdep
+bin2c
diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile
index 4fcef87bb875..ec10d9345bc2 100644
--- a/scripts/basic/Makefile
+++ b/scripts/basic/Makefile
@@ -9,6 +9,7 @@
# fixdep: Used to generate dependency information during build process
hostprogs-y := fixdep
+hostprogs-$(CONFIG_BUILD_BIN2C) += bin2c
always := $(hostprogs-y)
# fixdep is needed to compile other host programs
diff --git a/scripts/bin2c.c b/scripts/basic/bin2c.c
index 96dd2bcbb407..af187e695345 100644
--- a/scripts/bin2c.c
+++ b/scripts/basic/bin2c.c
@@ -11,7 +11,7 @@
int main(int argc, char *argv[])
{
- int ch, total=0;
+ int ch, total = 0;
if (argc > 1)
printf("const char %s[] %s=\n",
@@ -19,10 +19,9 @@ int main(int argc, char *argv[])
do {
printf("\t\"");
- while ((ch = getchar()) != EOF)
- {
+ while ((ch = getchar()) != EOF) {
total++;
- printf("\\x%02x",ch);
+ printf("\\x%02x", ch);
if (total % 16 == 0)
break;
}
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 182be0f12407..dc72a9b3172d 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -435,7 +435,7 @@ sub build_types {
}x;
$Type = qr{
$NonptrType
- (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)?
+ (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*\s*(?:const\s*)?|\[\])+|(?:\s*\[\s*\])+)?
(?:\s+$Inline|\s+$Modifier)*
}x;
$Declare = qr{(?:$Storage\s+(?:$Inline\s+)?)?$Type};
@@ -550,11 +550,43 @@ sub seed_camelcase_includes {
}
}
+sub git_commit_info {
+ my ($commit, $id, $desc) = @_;
+
+ return ($id, $desc) if ((which("git") eq "") || !(-e ".git"));
+
+ my $output = `git log --no-color --format='%H %s' -1 $commit 2>&1`;
+ $output =~ s/^\s*//gm;
+ my @lines = split("\n", $output);
+
+ if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous\./) {
+# Maybe one day convert this block of bash into something that returns
+# all matching commit ids, but it's very slow...
+#
+# echo "checking commits $1..."
+# git rev-list --remotes | grep -i "^$1" |
+# while read line ; do
+# git log --format='%H %s' -1 $line |
+# echo "commit $(cut -c 1-12,41-)"
+# done
+ } elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./) {
+ } else {
+ $id = substr($lines[0], 0, 12);
+ $desc = substr($lines[0], 41);
+ }
+
+ return ($id, $desc);
+}
+
$chk_signoff = 0 if ($file);
my @rawlines = ();
my @lines = ();
my @fixed = ();
+my @fixed_inserted = ();
+my @fixed_deleted = ();
+my $fixlinenr = -1;
+
my $vname;
for my $filename (@ARGV) {
my $FILE;
@@ -583,6 +615,9 @@ for my $filename (@ARGV) {
@rawlines = ();
@lines = ();
@fixed = ();
+ @fixed_inserted = ();
+ @fixed_deleted = ();
+ $fixlinenr = -1;
}
exit($exit);
@@ -674,6 +709,18 @@ sub format_email {
return $formatted_email;
}
+sub which {
+ my ($bin) = @_;
+
+ foreach my $path (split(/:/, $ENV{PATH})) {
+ if (-e "$path/$bin") {
+ return "$path/$bin";
+ }
+ }
+
+ return "";
+}
+
sub which_conf {
my ($conf) = @_;
@@ -1483,6 +1530,90 @@ sub report_dump {
our @report;
}
+sub fixup_current_range {
+ my ($lineRef, $offset, $length) = @_;
+
+ if ($$lineRef =~ /^\@\@ -\d+,\d+ \+(\d+),(\d+) \@\@/) {
+ my $o = $1;
+ my $l = $2;
+ my $no = $o + $offset;
+ my $nl = $l + $length;
+ $$lineRef =~ s/\+$o,$l \@\@/\+$no,$nl \@\@/;
+ }
+}
+
+sub fix_inserted_deleted_lines {
+ my ($linesRef, $insertedRef, $deletedRef) = @_;
+
+ my $range_last_linenr = 0;
+ my $delta_offset = 0;
+
+ my $old_linenr = 0;
+ my $new_linenr = 0;
+
+ my $next_insert = 0;
+ my $next_delete = 0;
+
+ my @lines = ();
+
+ my $inserted = @{$insertedRef}[$next_insert++];
+ my $deleted = @{$deletedRef}[$next_delete++];
+
+ foreach my $old_line (@{$linesRef}) {
+ my $save_line = 1;
+ my $line = $old_line; #don't modify the array
+ if ($line =~ /^(?:\+\+\+\|\-\-\-)\s+\S+/) { #new filename
+ $delta_offset = 0;
+ } elsif ($line =~ /^\@\@ -\d+,\d+ \+\d+,\d+ \@\@/) { #new hunk
+ $range_last_linenr = $new_linenr;
+ fixup_current_range(\$line, $delta_offset, 0);
+ }
+
+ while (defined($deleted) && ${$deleted}{'LINENR'} == $old_linenr) {
+ $deleted = @{$deletedRef}[$next_delete++];
+ $save_line = 0;
+ fixup_current_range(\$lines[$range_last_linenr], $delta_offset--, -1);
+ }
+
+ while (defined($inserted) && ${$inserted}{'LINENR'} == $old_linenr) {
+ push(@lines, ${$inserted}{'LINE'});
+ $inserted = @{$insertedRef}[$next_insert++];
+ $new_linenr++;
+ fixup_current_range(\$lines[$range_last_linenr], $delta_offset++, 1);
+ }
+
+ if ($save_line) {
+ push(@lines, $line);
+ $new_linenr++;
+ }
+
+ $old_linenr++;
+ }
+
+ return @lines;
+}
+
+sub fix_insert_line {
+ my ($linenr, $line) = @_;
+
+ my $inserted = {
+ LINENR => $linenr,
+ LINE => $line,
+ };
+ push(@fixed_inserted, $inserted);
+}
+
+sub fix_delete_line {
+ my ($linenr, $line) = @_;
+
+ my $deleted = {
+ LINENR => $linenr,
+ LINE => $line,
+ };
+
+ push(@fixed_deleted, $deleted);
+}
+
sub ERROR {
my ($type, $msg) = @_;
@@ -1637,11 +1768,13 @@ sub process {
my $signoff = 0;
my $is_patch = 0;
- my $in_header_lines = 1;
+ my $in_header_lines = $file ? 0 : 1;
my $in_commit_log = 0; #Scanning lines before patch
-
+ my $reported_maintainer_file = 0;
my $non_utf8_charset = 0;
+ my $last_blank_line = 0;
+
our @report = ();
our $cnt_lines = 0;
our $cnt_error = 0;
@@ -1759,8 +1892,10 @@ sub process {
$realcnt = 0;
$linenr = 0;
+ $fixlinenr = -1;
foreach my $line (@lines) {
$linenr++;
+ $fixlinenr++;
my $sline = $line; #copy of $line
$sline =~ s/$;/ /g; #with comments as spaces
@@ -1891,7 +2026,7 @@ sub process {
if (WARN("BAD_SIGN_OFF",
"Do not use whitespace before $ucfirst_sign_off\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =
+ $fixed[$fixlinenr] =
"$ucfirst_sign_off $email";
}
}
@@ -1899,7 +2034,7 @@ sub process {
if (WARN("BAD_SIGN_OFF",
"'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =
+ $fixed[$fixlinenr] =
"$ucfirst_sign_off $email";
}
@@ -1908,7 +2043,7 @@ sub process {
if (WARN("BAD_SIGN_OFF",
"Use a single space after $ucfirst_sign_off\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =
+ $fixed[$fixlinenr] =
"$ucfirst_sign_off $email";
}
}
@@ -1956,6 +2091,31 @@ sub process {
"Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr);
}
+# Check for improperly formed commit descriptions
+ if ($in_commit_log &&
+ $line =~ /\bcommit\s+[0-9a-f]{5,}/i &&
+ $line !~ /\b[Cc]ommit [0-9a-f]{12,16} \("/) {
+ $line =~ /\b(c)ommit\s+([0-9a-f]{5,})/i;
+ my $init_char = $1;
+ my $orig_commit = lc($2);
+ my $id = '01234567890ab';
+ my $desc = 'commit description';
+ ($id, $desc) = git_commit_info($orig_commit, $id, $desc);
+ ERROR("GIT_COMMIT_ID",
+ "Please use 12 to 16 chars for the git commit ID like: '${init_char}ommit $id (\"$desc\")'\n" . $herecurr);
+ }
+
+# Check for added, moved or deleted files
+ if (!$reported_maintainer_file && !$in_commit_log &&
+ ($line =~ /^(?:new|deleted) file mode\s*\d+\s*$/ ||
+ $line =~ /^rename (?:from|to) [\w\/\.\-]+\s*$/ ||
+ ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ &&
+ (defined($1) || defined($2))))) {
+ $reported_maintainer_file = 1;
+ WARN("FILE_PATH_CHANGES",
+ "added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr);
+ }
+
# Check for wrappage within a valid hunk of the file
if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) {
ERROR("CORRUPTED_PATCH",
@@ -1993,7 +2153,8 @@ sub process {
# Check if it's the start of a commit log
# (not a header line and we haven't seen the patch filename)
if ($in_header_lines && $realfile =~ /^$/ &&
- $rawline !~ /^(commit\b|from\b|[\w-]+:).+$/i) {
+ !($rawline =~ /^\s+\S/ ||
+ $rawline =~ /^(commit\b|from\b|[\w-]+:).*$/i)) {
$in_header_lines = 0;
$in_commit_log = 1;
}
@@ -2021,14 +2182,14 @@ sub process {
if (ERROR("DOS_LINE_ENDINGS",
"DOS line endings\n" . $herevet) &&
$fix) {
- $fixed[$linenr - 1] =~ s/[\s\015]+$//;
+ $fixed[$fixlinenr] =~ s/[\s\015]+$//;
}
} elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) {
my $herevet = "$here\n" . cat_vet($rawline) . "\n";
if (ERROR("TRAILING_WHITESPACE",
"trailing whitespace\n" . $herevet) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\s+$//;
+ $fixed[$fixlinenr] =~ s/\s+$//;
}
$rpt_cleaners = 1;
@@ -2049,7 +2210,7 @@ sub process {
# Only applies when adding the entry originally, after that we do not have
# sufficient context to determine whether it is indeed long enough.
if ($realfile =~ /Kconfig/ &&
- $line =~ /.\s*config\s+/) {
+ $line =~ /^\+\s*config\s+/) {
my $length = 0;
my $cnt = $realcnt;
my $ln = $linenr + 1;
@@ -2062,10 +2223,11 @@ sub process {
$is_end = $lines[$ln - 1] =~ /^\+/;
next if ($f =~ /^-/);
+ last if (!$file && $f =~ /^\@\@/);
- if ($lines[$ln - 1] =~ /.\s*(?:bool|tristate)\s*\"/) {
+ if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate)\s*\"/) {
$is_start = 1;
- } elsif ($lines[$ln - 1] =~ /.\s*(?:---)?help(?:---)?$/) {
+ } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) {
$length = -1;
}
@@ -2166,7 +2328,7 @@ sub process {
if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
"unnecessary whitespace before a quoted newline\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
+ $fixed[$fixlinenr] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
}
}
@@ -2203,7 +2365,7 @@ sub process {
if (ERROR("CODE_INDENT",
"code indent should use tabs where possible\n" . $herevet) &&
$fix) {
- $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
+ $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
}
}
@@ -2213,9 +2375,9 @@ sub process {
if (WARN("SPACE_BEFORE_TAB",
"please, no space before tabs\n" . $herevet) &&
$fix) {
- while ($fixed[$linenr - 1] =~
+ while ($fixed[$fixlinenr] =~
s/(^\+.*) {8,8}+\t/$1\t\t/) {}
- while ($fixed[$linenr - 1] =~
+ while ($fixed[$fixlinenr] =~
s/(^\+.*) +\t/$1\t/) {}
}
}
@@ -2249,19 +2411,19 @@ sub process {
if (CHK("PARENTHESIS_ALIGNMENT",
"Alignment should match open parenthesis\n" . $hereprev) &&
$fix && $line =~ /^\+/) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^\+[ \t]*/\+$goodtabindent/;
}
}
}
}
- if ($line =~ /^\+.*\*[ \t]*\)[ \t]+(?!$Assignment|$Arithmetic)/) {
+ if ($line =~ /^\+.*\(\s*$Type\s*\)[ \t]+(?!$Assignment|$Arithmetic)/) {
if (CHK("SPACING",
- "No space is necessary after a cast\n" . $hereprev) &&
+ "No space is necessary after a cast\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
- s/^(\+.*\*[ \t]*\))[ \t]+/$1/;
+ $fixed[$fixlinenr] =~
+ s/(\(\s*$Type\s*\))[ \t]+/$1/;
}
}
@@ -2291,10 +2453,44 @@ sub process {
"networking block comments put the trailing */ on a separate line\n" . $herecurr);
}
+# check for missing blank lines after struct/union declarations
+# with exceptions for various attributes and macros
+ if ($prevline =~ /^[\+ ]};?\s*$/ &&
+ $line =~ /^\+/ &&
+ !($line =~ /^\+\s*$/ ||
+ $line =~ /^\+\s*EXPORT_SYMBOL/ ||
+ $line =~ /^\+\s*MODULE_/i ||
+ $line =~ /^\+\s*\#\s*(?:end|elif|else)/ ||
+ $line =~ /^\+[a-z_]*init/ ||
+ $line =~ /^\+\s*(?:static\s+)?[A-Z_]*ATTR/ ||
+ $line =~ /^\+\s*DECLARE/ ||
+ $line =~ /^\+\s*__setup/)) {
+ if (CHK("LINE_SPACING",
+ "Please use a blank line after function/struct/union/enum declarations\n" . $hereprev) &&
+ $fix) {
+ fix_insert_line($fixlinenr, "\+");
+ }
+ }
+
+# check for multiple consecutive blank lines
+ if ($prevline =~ /^[\+ ]\s*$/ &&
+ $line =~ /^\+\s*$/ &&
+ $last_blank_line != ($linenr - 1)) {
+ if (CHK("LINE_SPACING",
+ "Please don't use multiple blank lines\n" . $hereprev) &&
+ $fix) {
+ fix_delete_line($fixlinenr, $rawline);
+ }
+
+ $last_blank_line = $linenr;
+ }
+
# check for missing blank lines after declarations
if ($sline =~ /^\+\s+\S/ && #Not at char 1
# actual declarations
($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # function pointer declarations
+ $prevline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
# foo bar; where foo is some local typedef or #define
$prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
# known declaration macros
@@ -2307,6 +2503,8 @@ sub process {
$prevline =~ /(?:\{\s*|\\)$/) &&
# looks like a declaration
!($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # function pointer declarations
+ $sline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
# foo bar; where foo is some local typedef or #define
$sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
# known declaration macros
@@ -2321,8 +2519,11 @@ sub process {
$sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
# indentation of previous and current line are the same
(($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
- WARN("SPACING",
- "Missing a blank line after declarations\n" . $hereprev);
+ if (WARN("LINE_SPACING",
+ "Missing a blank line after declarations\n" . $hereprev) &&
+ $fix) {
+ fix_insert_line($fixlinenr, "\+");
+ }
}
# check for spaces at the beginning of a line.
@@ -2335,13 +2536,33 @@ sub process {
if (WARN("LEADING_SPACE",
"please, no spaces at the start of a line\n" . $herevet) &&
$fix) {
- $fixed[$linenr - 1] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
+ $fixed[$fixlinenr] =~ s/^\+([ \t]+)/"\+" . tabify($1)/e;
}
}
# check we are in a valid C source file if not then ignore this hunk
next if ($realfile !~ /\.(h|c)$/);
+# check indentation of any line with a bare else
+# if the previous line is a break or return and is indented 1 tab more...
+ if ($sline =~ /^\+([\t]+)(?:}[ \t]*)?else(?:[ \t]*{)?\s*$/) {
+ my $tabs = length($1) + 1;
+ if ($prevline =~ /^\+\t{$tabs,$tabs}(?:break|return)\b/) {
+ WARN("UNNECESSARY_ELSE",
+ "else is not generally useful after a break or return\n" . $hereprev);
+ }
+ }
+
+# check indentation of a line with a break;
+# if the previous line is a goto or return and is indented the same # of tabs
+ if ($sline =~ /^\+([\t]+)break\s*;\s*$/) {
+ my $tabs = $1;
+ if ($prevline =~ /^\+$tabs(?:goto|return)\b/) {
+ WARN("UNNECESSARY_BREAK",
+ "break is not useful after a goto or return\n" . $hereprev);
+ }
+ }
+
# discourage the addition of CONFIG_EXPERIMENTAL in #if(def).
if ($line =~ /^\+\s*\#\s*if.*\bCONFIG_EXPERIMENTAL\b/) {
WARN("CONFIG_EXPERIMENTAL",
@@ -2477,7 +2698,7 @@ sub process {
# if/while/etc brace do not go on next line, unless defining a do while loop,
# or if that brace on the next line is for something else
- if ($line =~ /(.*)\b((?:if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
+ if ($line =~ /(.*)\b((?:if|while|for|switch|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
my $pre_ctx = "$1$2";
my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0);
@@ -2504,7 +2725,7 @@ sub process {
#print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
#print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
- if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
+ if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
ERROR("OPEN_BRACE",
"that open brace { should be on the previous line\n" .
"$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
@@ -2523,7 +2744,7 @@ sub process {
}
# Check relative indent for conditionals and blocks.
- if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
+ if ($line =~ /\b(?:(?:if|while|for|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
($stat, $cond, $line_nr_next, $remain_next, $off_next) =
ctx_statement_block($linenr, $realcnt, 0)
if (!defined $stat);
@@ -2654,8 +2875,18 @@ sub process {
# check for initialisation to aggregates open brace on the next line
if ($line =~ /^.\s*{/ &&
$prevline =~ /(?:^|[^=])=\s*$/) {
- ERROR("OPEN_BRACE",
- "that open brace { should be on the previous line\n" . $hereprev);
+ if (ERROR("OPEN_BRACE",
+ "that open brace { should be on the previous line\n" . $hereprev) &&
+ $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+ fix_delete_line($fixlinenr - 1, $prevrawline);
+ fix_delete_line($fixlinenr, $rawline);
+ my $fixedline = $prevrawline;
+ $fixedline =~ s/\s*=\s*$/ = {/;
+ fix_insert_line($fixlinenr, $fixedline);
+ $fixedline = $line;
+ $fixedline =~ s/^(.\s*){\s*/$1/;
+ fix_insert_line($fixlinenr, $fixedline);
+ }
}
#
@@ -2680,10 +2911,10 @@ sub process {
if (ERROR("C99_COMMENTS",
"do not use C99 // comments\n" . $herecurr) &&
$fix) {
- my $line = $fixed[$linenr - 1];
+ my $line = $fixed[$fixlinenr];
if ($line =~ /\/\/(.*)$/) {
my $comment = trim($1);
- $fixed[$linenr - 1] =~ s@\/\/(.*)$@/\* $comment \*/@;
+ $fixed[$fixlinenr] =~ s@\/\/(.*)$@/\* $comment \*/@;
}
}
}
@@ -2742,7 +2973,7 @@ sub process {
"do not initialise globals to 0 or NULL\n" .
$herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
+ $fixed[$fixlinenr] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
}
}
# check for static initialisers.
@@ -2751,7 +2982,7 @@ sub process {
"do not initialise statics to 0 or NULL\n" .
$herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
+ $fixed[$fixlinenr] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
}
}
@@ -2781,7 +3012,7 @@ sub process {
if (ERROR("FUNCTION_WITHOUT_ARGS",
"Bad function definition - $1() should probably be $1(void)\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
+ $fixed[$fixlinenr] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
}
}
@@ -2790,7 +3021,7 @@ sub process {
if (WARN("DEFINE_PCI_DEVICE_TABLE",
"Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
+ $fixed[$fixlinenr] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
}
}
@@ -2827,7 +3058,7 @@ sub process {
my $sub_from = $ident;
my $sub_to = $ident;
$sub_to =~ s/\Q$from\E/$to/;
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s@\Q$sub_from\E@$sub_to@;
}
}
@@ -2855,7 +3086,7 @@ sub process {
my $sub_from = $match;
my $sub_to = $match;
$sub_to =~ s/\Q$from\E/$to/;
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s@\Q$sub_from\E@$sub_to@;
}
}
@@ -2917,7 +3148,7 @@ sub process {
if (WARN("PREFER_PR_LEVEL",
"Prefer pr_warn(... to pr_warning(...\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\bpr_warning\b/pr_warn/;
}
}
@@ -2933,17 +3164,40 @@ sub process {
# function brace can't be on same line, except for #defines of do while,
# or if closed on same line
- if (($line=~/$Type\s*$Ident\(.*\).*\s{/) and
+ if (($line=~/$Type\s*$Ident\(.*\).*\s*{/) and
!($line=~/\#\s*define.*do\s{/) and !($line=~/}/)) {
- ERROR("OPEN_BRACE",
- "open brace '{' following function declarations go on the next line\n" . $herecurr);
+ if (ERROR("OPEN_BRACE",
+ "open brace '{' following function declarations go on the next line\n" . $herecurr) &&
+ $fix) {
+ fix_delete_line($fixlinenr, $rawline);
+ my $fixed_line = $rawline;
+ $fixed_line =~ /(^..*$Type\s*$Ident\(.*\)\s*){(.*)$/;
+ my $line1 = $1;
+ my $line2 = $2;
+ fix_insert_line($fixlinenr, ltrim($line1));
+ fix_insert_line($fixlinenr, "\+{");
+ if ($line2 !~ /^\s*$/) {
+ fix_insert_line($fixlinenr, "\+\t" . trim($line2));
+ }
+ }
}
# open braces for enum, union and struct go on the same line.
if ($line =~ /^.\s*{/ &&
$prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) {
- ERROR("OPEN_BRACE",
- "open brace '{' following $1 go on the same line\n" . $hereprev);
+ if (ERROR("OPEN_BRACE",
+ "open brace '{' following $1 go on the same line\n" . $hereprev) &&
+ $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+ fix_delete_line($fixlinenr - 1, $prevrawline);
+ fix_delete_line($fixlinenr, $rawline);
+ my $fixedline = rtrim($prevrawline) . " {";
+ fix_insert_line($fixlinenr, $fixedline);
+ $fixedline = $rawline;
+ $fixedline =~ s/^(.\s*){\s*/$1\t/;
+ if ($fixedline !~ /^\+\s*$/) {
+ fix_insert_line($fixlinenr, $fixedline);
+ }
+ }
}
# missing space after union, struct or enum definition
@@ -2951,7 +3205,7 @@ sub process {
if (WARN("SPACING",
"missing space after $1 definition\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/;
}
}
@@ -3021,7 +3275,7 @@ sub process {
}
if (show_type("SPACING") && $fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^(.\s*)$Declare\s*\(\s*\*\s*$Ident\s*\)\s*\(/$1 . $declare . $post_declare_space . '(*' . $funcname . ')('/ex;
}
}
@@ -3038,7 +3292,7 @@ sub process {
if (ERROR("BRACKET_SPACE",
"space prohibited before open square bracket '['\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^(\+.*?)\s+\[/$1\[/;
}
}
@@ -3073,7 +3327,7 @@ sub process {
if (WARN("SPACING",
"space prohibited between function name and open parenthesis '('\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\b$name\s+\(/$name\(/;
}
}
@@ -3341,8 +3595,8 @@ sub process {
$fixed_line = $fixed_line . $fix_elements[$#elements];
}
- if ($fix && $line_fixed && $fixed_line ne $fixed[$linenr - 1]) {
- $fixed[$linenr - 1] = $fixed_line;
+ if ($fix && $line_fixed && $fixed_line ne $fixed[$fixlinenr]) {
+ $fixed[$fixlinenr] = $fixed_line;
}
@@ -3353,7 +3607,7 @@ sub process {
if (WARN("SPACING",
"space prohibited before semicolon\n" . $herecurr) &&
$fix) {
- 1 while $fixed[$linenr - 1] =~
+ 1 while $fixed[$fixlinenr] =~
s/^(\+.*\S)\s+;/$1;/;
}
}
@@ -3386,7 +3640,7 @@ sub process {
if (ERROR("SPACING",
"space required before the open brace '{'\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/^(\+.*(?:do|\))){/$1 {/;
+ $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/;
}
}
@@ -3404,7 +3658,7 @@ sub process {
if (ERROR("SPACING",
"space required after that close brace '}'\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/}((?!(?:,|;|\)))\S)/} $1/;
}
}
@@ -3414,7 +3668,7 @@ sub process {
if (ERROR("SPACING",
"space prohibited after that open square bracket '['\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\[\s+/\[/;
}
}
@@ -3422,7 +3676,7 @@ sub process {
if (ERROR("SPACING",
"space prohibited before that close square bracket ']'\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\s+\]/\]/;
}
}
@@ -3433,7 +3687,7 @@ sub process {
if (ERROR("SPACING",
"space prohibited after that open parenthesis '('\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\(\s+/\(/;
}
}
@@ -3443,18 +3697,27 @@ sub process {
if (ERROR("SPACING",
"space prohibited before that close parenthesis ')'\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ print("fixlinenr: <$fixlinenr> fixed[fixlinenr]: <$fixed[$fixlinenr]>\n");
+ $fixed[$fixlinenr] =~
s/\s+\)/\)/;
}
}
+# check unnecessary parentheses around addressof/dereference single $Lvals
+# ie: &(foo->bar) should be &foo->bar and *(foo->bar) should be *foo->bar
+
+ while ($line =~ /(?:[^&]&\s*|\*)\(\s*($Ident\s*(?:$Member\s*)+)\s*\)/g) {
+ CHK("UNNECESSARY_PARENTHESES",
+ "Unnecessary parentheses around $1\n" . $herecurr);
+ }
+
#goto labels aren't indented, allow a single space however
if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
!($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
if (WARN("INDENTED_LABEL",
"labels should not be indented\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^(.)\s+/$1/;
}
}
@@ -3516,7 +3779,7 @@ sub process {
if (ERROR("SPACING",
"space required before the open parenthesis '('\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\b(if|while|for|switch)\(/$1 \(/;
}
}
@@ -3606,7 +3869,7 @@ sub process {
# if should not continue a brace
if ($line =~ /}\s*if\b/) {
ERROR("TRAILING_STATEMENTS",
- "trailing statements should be on next line\n" .
+ "trailing statements should be on next line (or did you mean 'else if'?)\n" .
$herecurr);
}
# case and default should not have general statements after them
@@ -3622,14 +3885,26 @@ sub process {
# Check for }<nl>else {, these must be at the same
# indent level to be relevant to each other.
- if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and
- $previndent == $indent) {
- ERROR("ELSE_AFTER_BRACE",
- "else should follow close brace '}'\n" . $hereprev);
+ if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ &&
+ $previndent == $indent) {
+ if (ERROR("ELSE_AFTER_BRACE",
+ "else should follow close brace '}'\n" . $hereprev) &&
+ $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+ fix_delete_line($fixlinenr - 1, $prevrawline);
+ fix_delete_line($fixlinenr, $rawline);
+ my $fixedline = $prevrawline;
+ $fixedline =~ s/}\s*$//;
+ if ($fixedline !~ /^\+\s*$/) {
+ fix_insert_line($fixlinenr, $fixedline);
+ }
+ $fixedline = $rawline;
+ $fixedline =~ s/^(.\s*)else/$1} else/;
+ fix_insert_line($fixlinenr, $fixedline);
+ }
}
- if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and
- $previndent == $indent) {
+ if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ &&
+ $previndent == $indent) {
my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0);
# Find out what is on the end of the line after the
@@ -3638,8 +3913,18 @@ sub process {
$s =~ s/\n.*//g;
if ($s =~ /^\s*;/) {
- ERROR("WHILE_AFTER_BRACE",
- "while should follow close brace '}'\n" . $hereprev);
+ if (ERROR("WHILE_AFTER_BRACE",
+ "while should follow close brace '}'\n" . $hereprev) &&
+ $fix && $prevline =~ /^\+/ && $line =~ /^\+/) {
+ fix_delete_line($fixlinenr - 1, $prevrawline);
+ fix_delete_line($fixlinenr, $rawline);
+ my $fixedline = $prevrawline;
+ my $trailing = $rawline;
+ $trailing =~ s/^\+//;
+ $trailing = trim($trailing);
+ $fixedline =~ s/}\s*$/} $trailing/;
+ fix_insert_line($fixlinenr, $fixedline);
+ }
}
}
@@ -3653,7 +3938,7 @@ sub process {
"Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) &&
$fix) {
my $hexval = sprintf("0x%x", oct($var));
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/\b$var\b/$hexval/;
}
}
@@ -3689,7 +3974,7 @@ sub process {
if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION",
"Whitespace after \\ makes next lines useless\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\s+$//;
+ $fixed[$fixlinenr] =~ s/\s+$//;
}
}
@@ -3762,7 +4047,7 @@ sub process {
$dstat !~ /^(?:$Ident|-?$Constant),$/ && # 10, // foo(),
$dstat !~ /^(?:$Ident|-?$Constant);$/ && # foo();
$dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ && # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz
- $dstat !~ /^'X'$/ && # character constants
+ $dstat !~ /^'X'$/ && $dstat !~ /^'XX'$/ && # character constants
$dstat !~ /$exceptions/ &&
$dstat !~ /^\.$Ident\s*=/ && # .foo =
$dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo
@@ -4014,6 +4299,23 @@ sub process {
}
}
+# check for unnecessary "Out of Memory" messages
+ if ($line =~ /^\+.*\b$logFunctions\s*\(/ &&
+ $prevline =~ /^[ \+]\s*if\s*\(\s*(\!\s*|NULL\s*==\s*)?($Lval)(\s*==\s*NULL\s*)?\s*\)/ &&
+ (defined $1 || defined $3) &&
+ $linenr > 3) {
+ my $testval = $2;
+ my $testline = $lines[$linenr - 3];
+
+ my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0);
+# print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n");
+
+ if ($c =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|(?:dev_)?alloc_skb)/) {
+ WARN("OOM_MESSAGE",
+ "Possible unnecessary 'out of memory' message\n" . $hereprev);
+ }
+ }
+
# check for bad placement of section $InitAttribute (e.g.: __initdata)
if ($line =~ /(\b$InitAttribute\b)/) {
my $attr = $1;
@@ -4027,7 +4329,7 @@ sub process {
WARN("MISPLACED_INIT",
"$attr should be placed after $var\n" . $herecurr))) &&
$fix) {
- $fixed[$linenr - 1] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
+ $fixed[$fixlinenr] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
}
}
}
@@ -4041,7 +4343,7 @@ sub process {
if (ERROR("INIT_ATTRIBUTE",
"Use of const init definition must use ${attr_prefix}initconst\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/$InitAttributeData/${attr_prefix}initconst/;
}
}
@@ -4052,12 +4354,12 @@ sub process {
if (ERROR("INIT_ATTRIBUTE",
"Use of $attr requires a separate use of const\n" . $herecurr) &&
$fix) {
- my $lead = $fixed[$linenr - 1] =~
+ my $lead = $fixed[$fixlinenr] =~
/(^\+\s*(?:static\s+))/;
$lead = rtrim($1);
$lead = "$lead " if ($lead !~ /^\+$/);
$lead = "${lead}const ";
- $fixed[$linenr - 1] =~ s/(^\+\s*(?:static\s+))/$lead/;
+ $fixed[$fixlinenr] =~ s/(^\+\s*(?:static\s+))/$lead/;
}
}
@@ -4070,7 +4372,7 @@ sub process {
if (WARN("CONSTANT_CONVERSION",
"$constant_func should be $func\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b$constant_func\b/$func/g;
+ $fixed[$fixlinenr] =~ s/\b$constant_func\b/$func/g;
}
}
@@ -4120,7 +4422,7 @@ sub process {
if (ERROR("SPACING",
"exactly one space required after that #$1\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~
+ $fixed[$fixlinenr] =~
s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /;
}
@@ -4168,7 +4470,7 @@ sub process {
if (WARN("INLINE",
"plain inline is preferred over $1\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b(__inline__|__inline)\b/inline/;
+ $fixed[$fixlinenr] =~ s/\b(__inline__|__inline)\b/inline/;
}
}
@@ -4193,7 +4495,7 @@ sub process {
if (WARN("PREFER_PRINTF",
"__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
+ $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
}
}
@@ -4204,7 +4506,7 @@ sub process {
if (WARN("PREFER_SCANF",
"__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
+ $fixed[$fixlinenr] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
}
}
@@ -4219,7 +4521,7 @@ sub process {
if (WARN("SIZEOF_PARENTHESIS",
"sizeof $1 should be sizeof($1)\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
+ $fixed[$fixlinenr] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
}
}
@@ -4242,7 +4544,7 @@ sub process {
if (WARN("PREFER_SEQ_PUTS",
"Prefer seq_puts to seq_printf\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\bseq_printf\b/seq_puts/;
+ $fixed[$fixlinenr] =~ s/\bseq_printf\b/seq_puts/;
}
}
}
@@ -4271,7 +4573,7 @@ sub process {
if (WARN("PREFER_ETHER_ADDR_COPY",
"Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
+ $fixed[$fixlinenr] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/;
}
}
@@ -4359,7 +4661,7 @@ sub process {
if (CHK("AVOID_EXTERNS",
"extern prototypes should be avoided in .h files\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
+ $fixed[$fixlinenr] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
}
}
@@ -4419,23 +4721,24 @@ sub process {
# check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc
if ($^V && $^V ge 5.10.0 &&
- $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/) {
+ $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) {
my $oldfunc = $3;
my $a1 = $4;
my $a2 = $10;
my $newfunc = "kmalloc_array";
$newfunc = "kcalloc" if ($oldfunc eq "kzalloc");
- if ($a1 =~ /^sizeof\s*\S/ || $a2 =~ /^sizeof\s*\S/) {
+ my $r1 = $a1;
+ my $r2 = $a2;
+ if ($a1 =~ /^sizeof\s*\S/) {
+ $r1 = $a2;
+ $r2 = $a1;
+ }
+ if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ &&
+ !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) {
if (WARN("ALLOC_WITH_MULTIPLY",
"Prefer $newfunc over $oldfunc with multiply\n" . $herecurr) &&
$fix) {
- my $r1 = $a1;
- my $r2 = $a2;
- if ($a1 =~ /^sizeof\s*\S/) {
- $r1 = $a2;
- $r2 = $a1;
- }
- $fixed[$linenr - 1] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
+ $fixed[$fixlinenr] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e;
}
}
@@ -4459,7 +4762,7 @@ sub process {
if (WARN("ONE_SEMICOLON",
"Statements terminations use 1 semicolon\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/(\s*;\s*){2,}$/;/g;
+ $fixed[$fixlinenr] =~ s/(\s*;\s*){2,}$/;/g;
}
}
@@ -4507,7 +4810,7 @@ sub process {
if (WARN("USE_FUNC",
"__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr) &&
$fix) {
- $fixed[$linenr - 1] =~ s/\b__FUNCTION__\b/__func__/g;
+ $fixed[$fixlinenr] =~ s/\b__FUNCTION__\b/__func__/g;
}
}
@@ -4750,12 +5053,16 @@ sub process {
hash_show_words(\%use_type, "Used");
hash_show_words(\%ignore_type, "Ignored");
- if ($clean == 0 && $fix && "@rawlines" ne "@fixed") {
+ if ($clean == 0 && $fix &&
+ ("@rawlines" ne "@fixed" ||
+ $#fixed_inserted >= 0 || $#fixed_deleted >= 0)) {
my $newfile = $filename;
$newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace);
my $linecount = 0;
my $f;
+ @fixed = fix_inserted_deleted_lines(\@fixed, \@fixed_inserted, \@fixed_deleted);
+
open($f, '>', $newfile)
or die "$P: Can't open $newfile for write\n";
foreach my $fixed_line (@fixed) {
@@ -4763,7 +5070,7 @@ sub process {
if ($file) {
if ($linecount > 3) {
$fixed_line =~ s/^\+//;
- print $f $fixed_line. "\n";
+ print $f $fixed_line . "\n";
}
} else {
print $f $fixed_line . "\n";
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index c05d586b1fee..899b4230320e 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -52,14 +52,12 @@ my (@stack, $re, $dre, $x, $xs, $funcre);
#8000008a: 20 1d sub sp,4
#80000ca8: fa cd 05 b0 sub sp,sp,1456
$re = qr/^.*sub.*sp.*,([0-9]{1,8})/o;
- } elsif ($arch =~ /^i[3456]86$/) {
+ } elsif ($arch =~ /^x86(_64)?$/ || $arch =~ /^i[3456]86$/) {
#c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp
- $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%esp$/o;
- $dre = qr/^.*[as][du][db] (%.*),\%esp$/o;
- } elsif ($arch eq 'x86_64') {
- # 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp
- $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%rsp$/o;
- $dre = qr/^.*[as][du][db] (\%.*),\%rsp$/o;
+ # or
+ # 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp
+ $re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%(e|r)sp$/o;
+ $dre = qr/^.*[as][du][db] (%.*),\%(e|r)sp$/o;
} elsif ($arch eq 'ia64') {
#e0000000044011fc: 01 0f fc 8c adds r12=-384,r12
$re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o;
diff --git a/scripts/coccinelle/free/ifnullfree.cocci b/scripts/coccinelle/free/ifnullfree.cocci
new file mode 100644
index 000000000000..9321fe328a97
--- /dev/null
+++ b/scripts/coccinelle/free/ifnullfree.cocci
@@ -0,0 +1,54 @@
+/// NULL check before some freeing functions is not needed.
+///
+/// Based on checkpatch warning
+/// "kfree(NULL) is safe this check is probably not required"
+/// and kfreeaddr.cocci by Julia Lawall.
+///
+// Copyright: (C) 2014 Fabian Frederick. GPLv2.
+// Comments: -
+// Options: --no-includes --include-headers
+
+virtual patch
+virtual org
+virtual report
+virtual context
+
+@r2 depends on patch@
+expression E;
+@@
+- if (E)
+(
+- kfree(E);
++ kfree(E);
+|
+- debugfs_remove(E);
++ debugfs_remove(E);
+|
+- debugfs_remove_recursive(E);
++ debugfs_remove_recursive(E);
+|
+- usb_free_urb(E);
++ usb_free_urb(E);
+)
+
+@r depends on context || report || org @
+expression E;
+position p;
+@@
+
+* if (E)
+* \(kfree@p\|debugfs_remove@p\|debugfs_remove_recursive@p\|usb_free_urb\)(E);
+
+@script:python depends on org@
+p << r.p;
+@@
+
+cocci.print_main("NULL check before that freeing function is not needed", p)
+
+@script:python depends on report@
+p << r.p;
+@@
+
+msg = "WARNING: NULL check before freeing functions like kfree, debugfs_remove, debugfs_remove_recursive or usb_free_urb is not needed. Maybe consider reorganizing relevant code to avoid passing NULL values."
+coccilib.report.print_report(p[0], msg)
+
diff --git a/scripts/tags.sh b/scripts/tags.sh
index e6b011fe1d0d..cbfd269a6011 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -168,6 +168,7 @@ exuberant()
--extra=+f --c-kinds=+px \
--regex-asm='/^(ENTRY|_GLOBAL)\(([^)]*)\).*/\2/' \
--regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \
+ --regex-c='/^COMPAT_SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/compat_sys_\1/' \
--regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/' \
--regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1/' \
--regex-c++='/PAGEFLAG\(([^,)]*).*/Page\1/' \
@@ -231,6 +232,7 @@ emacs()
all_target_sources | xargs $1 -a \
--regex='/^\(ENTRY\|_GLOBAL\)(\([^)]*\)).*/\2/' \
--regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' \
+ --regex='/^COMPAT_SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/compat_sys_\1/' \
--regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/' \
--regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1/' \
--regex='/PAGEFLAG(\([^,)]*\).*/Page\1/' \